diff --git "a/stage4_ckpt/trainer_state.json" "b/stage4_ckpt/trainer_state.json" new file mode 100644--- /dev/null +++ "b/stage4_ckpt/trainer_state.json" @@ -0,0 +1,28096 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.99656041275047, + "global_step": 46800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 1.4126, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 1.0084, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 3e-05, + "loss": 0.9879, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 0.823, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.7661, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 6e-05, + "loss": 0.8244, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 7e-05, + "loss": 0.7085, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 8e-05, + "loss": 0.8008, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 9e-05, + "loss": 0.7042, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001, + "loss": 0.698, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 9.997862334330911e-05, + "loss": 0.7638, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 9.995724668661823e-05, + "loss": 0.6782, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 9.993587002992733e-05, + "loss": 0.7541, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 9.991449337323642e-05, + "loss": 0.6904, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 9.989311671654553e-05, + "loss": 0.6709, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 9.987174005985465e-05, + "loss": 0.7488, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 9.985036340316375e-05, + "loss": 0.6562, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 9.982898674647285e-05, + "loss": 0.7306, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 9.980761008978197e-05, + "loss": 0.6881, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 9.978623343309106e-05, + "loss": 0.6585, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 9.976485677640017e-05, + "loss": 0.7287, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 9.974348011970929e-05, + "loss": 0.6576, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 9.972210346301839e-05, + "loss": 0.7271, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 9.97007268063275e-05, + "loss": 0.6686, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 9.96793501496366e-05, + "loss": 0.662, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 9.965797349294572e-05, + "loss": 0.7368, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 9.963659683625481e-05, + "loss": 0.6588, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 9.961522017956391e-05, + "loss": 0.7176, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 9.959384352287303e-05, + "loss": 0.6552, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 9.957246686618214e-05, + "loss": 0.6416, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 9.955109020949124e-05, + "loss": 0.7067, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 9.952971355280036e-05, + "loss": 0.653, + "step": 320 + }, + { + "epoch": 0.01, + "learning_rate": 9.950833689610945e-05, + "loss": 0.7157, + "step": 330 + }, + { + "epoch": 0.01, + "learning_rate": 9.948696023941855e-05, + "loss": 0.6628, + "step": 340 + }, + { + "epoch": 0.01, + "learning_rate": 9.946558358272766e-05, + "loss": 0.6371, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 9.944420692603678e-05, + "loss": 0.7153, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 9.942283026934588e-05, + "loss": 0.6461, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 9.940145361265499e-05, + "loss": 0.6987, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 9.938007695596409e-05, + "loss": 0.6507, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 9.93587002992732e-05, + "loss": 0.6305, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 9.93373236425823e-05, + "loss": 0.7125, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 9.93159469858914e-05, + "loss": 0.6369, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 9.929457032920052e-05, + "loss": 0.7018, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 9.927319367250963e-05, + "loss": 0.6541, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 9.925181701581873e-05, + "loss": 0.6331, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 9.923044035912783e-05, + "loss": 0.7067, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 9.920906370243694e-05, + "loss": 0.6293, + "step": 470 + }, + { + "epoch": 0.02, + "learning_rate": 9.918768704574604e-05, + "loss": 0.6981, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 9.916631038905516e-05, + "loss": 0.6445, + "step": 490 + }, + { + "epoch": 0.02, + "learning_rate": 9.914493373236427e-05, + "loss": 0.6381, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 9.912355707567337e-05, + "loss": 0.7076, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 9.910218041898248e-05, + "loss": 0.6407, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 9.908080376229158e-05, + "loss": 0.6953, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 9.905942710560068e-05, + "loss": 0.638, + "step": 540 + }, + { + "epoch": 0.02, + "learning_rate": 9.903805044890979e-05, + "loss": 0.6264, + "step": 550 + }, + { + "epoch": 0.02, + "learning_rate": 9.901667379221891e-05, + "loss": 0.7074, + "step": 560 + }, + { + "epoch": 0.02, + "learning_rate": 9.899529713552801e-05, + "loss": 0.6239, + "step": 570 + }, + { + "epoch": 0.02, + "learning_rate": 9.897392047883712e-05, + "loss": 0.6856, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 9.895254382214622e-05, + "loss": 0.6448, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 9.893116716545533e-05, + "loss": 0.6204, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 9.890979050876443e-05, + "loss": 0.6987, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 9.888841385207353e-05, + "loss": 0.6235, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 9.886703719538265e-05, + "loss": 0.6914, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 9.884566053869176e-05, + "loss": 0.6492, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 9.882428388200086e-05, + "loss": 0.6222, + "step": 650 + }, + { + "epoch": 0.03, + "learning_rate": 9.880290722530997e-05, + "loss": 0.7033, + "step": 660 + }, + { + "epoch": 0.03, + "learning_rate": 9.878153056861907e-05, + "loss": 0.618, + "step": 670 + }, + { + "epoch": 0.03, + "learning_rate": 9.876015391192817e-05, + "loss": 0.6892, + "step": 680 + }, + { + "epoch": 0.03, + "learning_rate": 9.873877725523728e-05, + "loss": 0.6397, + "step": 690 + }, + { + "epoch": 0.03, + "learning_rate": 9.87174005985464e-05, + "loss": 0.63, + "step": 700 + }, + { + "epoch": 0.03, + "learning_rate": 9.86960239418555e-05, + "loss": 0.6927, + "step": 710 + }, + { + "epoch": 0.03, + "learning_rate": 9.86746472851646e-05, + "loss": 0.626, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 9.865327062847371e-05, + "loss": 0.6833, + "step": 730 + }, + { + "epoch": 0.03, + "learning_rate": 9.863189397178282e-05, + "loss": 0.6318, + "step": 740 + }, + { + "epoch": 0.03, + "learning_rate": 9.861051731509192e-05, + "loss": 0.6204, + "step": 750 + }, + { + "epoch": 0.03, + "learning_rate": 9.858914065840104e-05, + "loss": 0.6885, + "step": 760 + }, + { + "epoch": 0.03, + "learning_rate": 9.856776400171014e-05, + "loss": 0.6276, + "step": 770 + }, + { + "epoch": 0.03, + "learning_rate": 9.854638734501925e-05, + "loss": 0.6788, + "step": 780 + }, + { + "epoch": 0.03, + "learning_rate": 9.852501068832835e-05, + "loss": 0.643, + "step": 790 + }, + { + "epoch": 0.03, + "learning_rate": 9.850363403163746e-05, + "loss": 0.6186, + "step": 800 + }, + { + "epoch": 0.03, + "learning_rate": 9.848225737494656e-05, + "loss": 0.6965, + "step": 810 + }, + { + "epoch": 0.03, + "learning_rate": 9.846088071825567e-05, + "loss": 0.6322, + "step": 820 + }, + { + "epoch": 0.04, + "learning_rate": 9.843950406156478e-05, + "loss": 0.6845, + "step": 830 + }, + { + "epoch": 0.04, + "learning_rate": 9.841812740487389e-05, + "loss": 0.6313, + "step": 840 + }, + { + "epoch": 0.04, + "learning_rate": 9.839675074818299e-05, + "loss": 0.6157, + "step": 850 + }, + { + "epoch": 0.04, + "learning_rate": 9.83753740914921e-05, + "loss": 0.691, + "step": 860 + }, + { + "epoch": 0.04, + "learning_rate": 9.83539974348012e-05, + "loss": 0.6233, + "step": 870 + }, + { + "epoch": 0.04, + "learning_rate": 9.83326207781103e-05, + "loss": 0.6836, + "step": 880 + }, + { + "epoch": 0.04, + "learning_rate": 9.831124412141941e-05, + "loss": 0.6226, + "step": 890 + }, + { + "epoch": 0.04, + "learning_rate": 9.828986746472853e-05, + "loss": 0.6184, + "step": 900 + }, + { + "epoch": 0.04, + "learning_rate": 9.826849080803763e-05, + "loss": 0.688, + "step": 910 + }, + { + "epoch": 0.04, + "learning_rate": 9.824711415134674e-05, + "loss": 0.6189, + "step": 920 + }, + { + "epoch": 0.04, + "learning_rate": 9.822573749465584e-05, + "loss": 0.6648, + "step": 930 + }, + { + "epoch": 0.04, + "learning_rate": 9.820436083796495e-05, + "loss": 0.6359, + "step": 940 + }, + { + "epoch": 0.04, + "learning_rate": 9.818298418127405e-05, + "loss": 0.6218, + "step": 950 + }, + { + "epoch": 0.04, + "learning_rate": 9.816160752458316e-05, + "loss": 0.6822, + "step": 960 + }, + { + "epoch": 0.04, + "learning_rate": 9.814023086789227e-05, + "loss": 0.6069, + "step": 970 + }, + { + "epoch": 0.04, + "learning_rate": 9.811885421120138e-05, + "loss": 0.6762, + "step": 980 + }, + { + "epoch": 0.04, + "learning_rate": 9.809747755451047e-05, + "loss": 0.6256, + "step": 990 + }, + { + "epoch": 0.04, + "learning_rate": 9.807610089781959e-05, + "loss": 0.6233, + "step": 1000 + }, + { + "epoch": 0.04, + "learning_rate": 9.805472424112869e-05, + "loss": 0.686, + "step": 1010 + }, + { + "epoch": 0.04, + "learning_rate": 9.80333475844378e-05, + "loss": 0.6155, + "step": 1020 + }, + { + "epoch": 0.04, + "learning_rate": 9.801197092774691e-05, + "loss": 0.6799, + "step": 1030 + }, + { + "epoch": 0.04, + "learning_rate": 9.799059427105602e-05, + "loss": 0.6234, + "step": 1040 + }, + { + "epoch": 0.04, + "learning_rate": 9.796921761436512e-05, + "loss": 0.6027, + "step": 1050 + }, + { + "epoch": 0.05, + "learning_rate": 9.794784095767421e-05, + "loss": 0.6891, + "step": 1060 + }, + { + "epoch": 0.05, + "learning_rate": 9.792646430098333e-05, + "loss": 0.6229, + "step": 1070 + }, + { + "epoch": 0.05, + "learning_rate": 9.790508764429244e-05, + "loss": 0.6803, + "step": 1080 + }, + { + "epoch": 0.05, + "learning_rate": 9.788371098760154e-05, + "loss": 0.6208, + "step": 1090 + }, + { + "epoch": 0.05, + "learning_rate": 9.786233433091066e-05, + "loss": 0.616, + "step": 1100 + }, + { + "epoch": 0.05, + "learning_rate": 9.784095767421976e-05, + "loss": 0.689, + "step": 1110 + }, + { + "epoch": 0.05, + "learning_rate": 9.781958101752885e-05, + "loss": 0.6064, + "step": 1120 + }, + { + "epoch": 0.05, + "learning_rate": 9.779820436083797e-05, + "loss": 0.6776, + "step": 1130 + }, + { + "epoch": 0.05, + "learning_rate": 9.777682770414708e-05, + "loss": 0.6283, + "step": 1140 + }, + { + "epoch": 0.05, + "learning_rate": 9.775545104745618e-05, + "loss": 0.6218, + "step": 1150 + }, + { + "epoch": 0.05, + "learning_rate": 9.773407439076529e-05, + "loss": 0.677, + "step": 1160 + }, + { + "epoch": 0.05, + "learning_rate": 9.77126977340744e-05, + "loss": 0.6142, + "step": 1170 + }, + { + "epoch": 0.05, + "learning_rate": 9.76913210773835e-05, + "loss": 0.6822, + "step": 1180 + }, + { + "epoch": 0.05, + "learning_rate": 9.76699444206926e-05, + "loss": 0.6278, + "step": 1190 + }, + { + "epoch": 0.05, + "learning_rate": 9.764856776400172e-05, + "loss": 0.6112, + "step": 1200 + }, + { + "epoch": 0.05, + "learning_rate": 9.762719110731082e-05, + "loss": 0.6853, + "step": 1210 + }, + { + "epoch": 0.05, + "learning_rate": 9.760581445061993e-05, + "loss": 0.6106, + "step": 1220 + }, + { + "epoch": 0.05, + "learning_rate": 9.758443779392903e-05, + "loss": 0.6801, + "step": 1230 + }, + { + "epoch": 0.05, + "learning_rate": 9.756306113723815e-05, + "loss": 0.6291, + "step": 1240 + }, + { + "epoch": 0.05, + "learning_rate": 9.754168448054724e-05, + "loss": 0.6107, + "step": 1250 + }, + { + "epoch": 0.05, + "learning_rate": 9.752030782385634e-05, + "loss": 0.6775, + "step": 1260 + }, + { + "epoch": 0.05, + "learning_rate": 9.749893116716546e-05, + "loss": 0.6075, + "step": 1270 + }, + { + "epoch": 0.05, + "learning_rate": 9.747755451047457e-05, + "loss": 0.6648, + "step": 1280 + }, + { + "epoch": 0.06, + "learning_rate": 9.745617785378367e-05, + "loss": 0.6302, + "step": 1290 + }, + { + "epoch": 0.06, + "learning_rate": 9.743480119709279e-05, + "loss": 0.6226, + "step": 1300 + }, + { + "epoch": 0.06, + "learning_rate": 9.741342454040188e-05, + "loss": 0.6778, + "step": 1310 + }, + { + "epoch": 0.06, + "learning_rate": 9.739204788371099e-05, + "loss": 0.6204, + "step": 1320 + }, + { + "epoch": 0.06, + "learning_rate": 9.737067122702009e-05, + "loss": 0.673, + "step": 1330 + }, + { + "epoch": 0.06, + "learning_rate": 9.734929457032921e-05, + "loss": 0.6294, + "step": 1340 + }, + { + "epoch": 0.06, + "learning_rate": 9.732791791363831e-05, + "loss": 0.5981, + "step": 1350 + }, + { + "epoch": 0.06, + "learning_rate": 9.730654125694742e-05, + "loss": 0.69, + "step": 1360 + }, + { + "epoch": 0.06, + "learning_rate": 9.728516460025654e-05, + "loss": 0.6094, + "step": 1370 + }, + { + "epoch": 0.06, + "learning_rate": 9.726378794356563e-05, + "loss": 0.6766, + "step": 1380 + }, + { + "epoch": 0.06, + "learning_rate": 9.724241128687473e-05, + "loss": 0.6344, + "step": 1390 + }, + { + "epoch": 0.06, + "learning_rate": 9.722103463018385e-05, + "loss": 0.5995, + "step": 1400 + }, + { + "epoch": 0.06, + "learning_rate": 9.719965797349295e-05, + "loss": 0.6793, + "step": 1410 + }, + { + "epoch": 0.06, + "learning_rate": 9.717828131680206e-05, + "loss": 0.6133, + "step": 1420 + }, + { + "epoch": 0.06, + "learning_rate": 9.715690466011116e-05, + "loss": 0.6735, + "step": 1430 + }, + { + "epoch": 0.06, + "learning_rate": 9.713552800342027e-05, + "loss": 0.6255, + "step": 1440 + }, + { + "epoch": 0.06, + "learning_rate": 9.711415134672937e-05, + "loss": 0.6047, + "step": 1450 + }, + { + "epoch": 0.06, + "learning_rate": 9.709277469003848e-05, + "loss": 0.6711, + "step": 1460 + }, + { + "epoch": 0.06, + "learning_rate": 9.70713980333476e-05, + "loss": 0.6094, + "step": 1470 + }, + { + "epoch": 0.06, + "learning_rate": 9.70500213766567e-05, + "loss": 0.6647, + "step": 1480 + }, + { + "epoch": 0.06, + "learning_rate": 9.70286447199658e-05, + "loss": 0.6187, + "step": 1490 + }, + { + "epoch": 0.06, + "learning_rate": 9.700726806327491e-05, + "loss": 0.6017, + "step": 1500 + }, + { + "epoch": 0.06, + "learning_rate": 9.698589140658401e-05, + "loss": 0.6775, + "step": 1510 + }, + { + "epoch": 0.06, + "learning_rate": 9.696451474989312e-05, + "loss": 0.6178, + "step": 1520 + }, + { + "epoch": 0.07, + "learning_rate": 9.694313809320222e-05, + "loss": 0.6752, + "step": 1530 + }, + { + "epoch": 0.07, + "learning_rate": 9.692176143651134e-05, + "loss": 0.6096, + "step": 1540 + }, + { + "epoch": 0.07, + "learning_rate": 9.690038477982044e-05, + "loss": 0.6106, + "step": 1550 + }, + { + "epoch": 0.07, + "learning_rate": 9.687900812312955e-05, + "loss": 0.6788, + "step": 1560 + }, + { + "epoch": 0.07, + "learning_rate": 9.685763146643865e-05, + "loss": 0.5967, + "step": 1570 + }, + { + "epoch": 0.07, + "learning_rate": 9.683625480974776e-05, + "loss": 0.6676, + "step": 1580 + }, + { + "epoch": 0.07, + "learning_rate": 9.681487815305686e-05, + "loss": 0.6145, + "step": 1590 + }, + { + "epoch": 0.07, + "learning_rate": 9.679350149636597e-05, + "loss": 0.5979, + "step": 1600 + }, + { + "epoch": 0.07, + "learning_rate": 9.677212483967508e-05, + "loss": 0.6844, + "step": 1610 + }, + { + "epoch": 0.07, + "learning_rate": 9.675074818298419e-05, + "loss": 0.6105, + "step": 1620 + }, + { + "epoch": 0.07, + "learning_rate": 9.672937152629329e-05, + "loss": 0.6555, + "step": 1630 + }, + { + "epoch": 0.07, + "learning_rate": 9.67079948696024e-05, + "loss": 0.6192, + "step": 1640 + }, + { + "epoch": 0.07, + "learning_rate": 9.66866182129115e-05, + "loss": 0.6031, + "step": 1650 + }, + { + "epoch": 0.07, + "learning_rate": 9.66652415562206e-05, + "loss": 0.672, + "step": 1660 + }, + { + "epoch": 0.07, + "learning_rate": 9.664386489952972e-05, + "loss": 0.6023, + "step": 1670 + }, + { + "epoch": 0.07, + "learning_rate": 9.662248824283883e-05, + "loss": 0.6692, + "step": 1680 + }, + { + "epoch": 0.07, + "learning_rate": 9.660111158614793e-05, + "loss": 0.6254, + "step": 1690 + }, + { + "epoch": 0.07, + "learning_rate": 9.657973492945704e-05, + "loss": 0.606, + "step": 1700 + }, + { + "epoch": 0.07, + "learning_rate": 9.655835827276614e-05, + "loss": 0.6759, + "step": 1710 + }, + { + "epoch": 0.07, + "learning_rate": 9.653698161607525e-05, + "loss": 0.6125, + "step": 1720 + }, + { + "epoch": 0.07, + "learning_rate": 9.651560495938435e-05, + "loss": 0.6554, + "step": 1730 + }, + { + "epoch": 0.07, + "learning_rate": 9.649422830269347e-05, + "loss": 0.6149, + "step": 1740 + }, + { + "epoch": 0.07, + "learning_rate": 9.647285164600257e-05, + "loss": 0.603, + "step": 1750 + }, + { + "epoch": 0.08, + "learning_rate": 9.645147498931168e-05, + "loss": 0.6846, + "step": 1760 + }, + { + "epoch": 0.08, + "learning_rate": 9.643009833262078e-05, + "loss": 0.6086, + "step": 1770 + }, + { + "epoch": 0.08, + "learning_rate": 9.640872167592989e-05, + "loss": 0.6677, + "step": 1780 + }, + { + "epoch": 0.08, + "learning_rate": 9.638734501923899e-05, + "loss": 0.6114, + "step": 1790 + }, + { + "epoch": 0.08, + "learning_rate": 9.63659683625481e-05, + "loss": 0.6115, + "step": 1800 + }, + { + "epoch": 0.08, + "learning_rate": 9.634459170585721e-05, + "loss": 0.6847, + "step": 1810 + }, + { + "epoch": 0.08, + "learning_rate": 9.632321504916632e-05, + "loss": 0.6067, + "step": 1820 + }, + { + "epoch": 0.08, + "learning_rate": 9.630183839247542e-05, + "loss": 0.6537, + "step": 1830 + }, + { + "epoch": 0.08, + "learning_rate": 9.628046173578453e-05, + "loss": 0.6172, + "step": 1840 + }, + { + "epoch": 0.08, + "learning_rate": 9.625908507909363e-05, + "loss": 0.6026, + "step": 1850 + }, + { + "epoch": 0.08, + "learning_rate": 9.623770842240274e-05, + "loss": 0.6696, + "step": 1860 + }, + { + "epoch": 0.08, + "learning_rate": 9.621633176571184e-05, + "loss": 0.6133, + "step": 1870 + }, + { + "epoch": 0.08, + "learning_rate": 9.619495510902096e-05, + "loss": 0.6602, + "step": 1880 + }, + { + "epoch": 0.08, + "learning_rate": 9.617357845233006e-05, + "loss": 0.61, + "step": 1890 + }, + { + "epoch": 0.08, + "learning_rate": 9.615220179563917e-05, + "loss": 0.6035, + "step": 1900 + }, + { + "epoch": 0.08, + "learning_rate": 9.613082513894827e-05, + "loss": 0.6798, + "step": 1910 + }, + { + "epoch": 0.08, + "learning_rate": 9.610944848225738e-05, + "loss": 0.6032, + "step": 1920 + }, + { + "epoch": 0.08, + "learning_rate": 9.608807182556648e-05, + "loss": 0.6576, + "step": 1930 + }, + { + "epoch": 0.08, + "learning_rate": 9.60666951688756e-05, + "loss": 0.6136, + "step": 1940 + }, + { + "epoch": 0.08, + "learning_rate": 9.60453185121847e-05, + "loss": 0.5976, + "step": 1950 + }, + { + "epoch": 0.08, + "learning_rate": 9.602394185549381e-05, + "loss": 0.6673, + "step": 1960 + }, + { + "epoch": 0.08, + "learning_rate": 9.60025651988029e-05, + "loss": 0.6047, + "step": 1970 + }, + { + "epoch": 0.08, + "learning_rate": 9.598118854211202e-05, + "loss": 0.6624, + "step": 1980 + }, + { + "epoch": 0.08, + "learning_rate": 9.595981188542112e-05, + "loss": 0.6161, + "step": 1990 + }, + { + "epoch": 0.09, + "learning_rate": 9.593843522873023e-05, + "loss": 0.5997, + "step": 2000 + }, + { + "epoch": 0.09, + "learning_rate": 9.591705857203935e-05, + "loss": 0.6689, + "step": 2010 + }, + { + "epoch": 0.09, + "learning_rate": 9.589568191534845e-05, + "loss": 0.6012, + "step": 2020 + }, + { + "epoch": 0.09, + "learning_rate": 9.587430525865755e-05, + "loss": 0.6641, + "step": 2030 + }, + { + "epoch": 0.09, + "learning_rate": 9.585292860196665e-05, + "loss": 0.6058, + "step": 2040 + }, + { + "epoch": 0.09, + "learning_rate": 9.583155194527576e-05, + "loss": 0.6037, + "step": 2050 + }, + { + "epoch": 0.09, + "learning_rate": 9.581017528858487e-05, + "loss": 0.6592, + "step": 2060 + }, + { + "epoch": 0.09, + "learning_rate": 9.578879863189397e-05, + "loss": 0.6061, + "step": 2070 + }, + { + "epoch": 0.09, + "learning_rate": 9.576742197520309e-05, + "loss": 0.6532, + "step": 2080 + }, + { + "epoch": 0.09, + "learning_rate": 9.57460453185122e-05, + "loss": 0.6204, + "step": 2090 + }, + { + "epoch": 0.09, + "learning_rate": 9.572466866182129e-05, + "loss": 0.5996, + "step": 2100 + }, + { + "epoch": 0.09, + "learning_rate": 9.57032920051304e-05, + "loss": 0.6717, + "step": 2110 + }, + { + "epoch": 0.09, + "learning_rate": 9.568191534843951e-05, + "loss": 0.606, + "step": 2120 + }, + { + "epoch": 0.09, + "learning_rate": 9.566053869174861e-05, + "loss": 0.6578, + "step": 2130 + }, + { + "epoch": 0.09, + "learning_rate": 9.563916203505772e-05, + "loss": 0.6095, + "step": 2140 + }, + { + "epoch": 0.09, + "learning_rate": 9.561778537836684e-05, + "loss": 0.6019, + "step": 2150 + }, + { + "epoch": 0.09, + "learning_rate": 9.559640872167594e-05, + "loss": 0.6782, + "step": 2160 + }, + { + "epoch": 0.09, + "learning_rate": 9.557503206498503e-05, + "loss": 0.5977, + "step": 2170 + }, + { + "epoch": 0.09, + "learning_rate": 9.555365540829415e-05, + "loss": 0.6533, + "step": 2180 + }, + { + "epoch": 0.09, + "learning_rate": 9.553227875160325e-05, + "loss": 0.6116, + "step": 2190 + }, + { + "epoch": 0.09, + "learning_rate": 9.551090209491236e-05, + "loss": 0.5907, + "step": 2200 + }, + { + "epoch": 0.09, + "learning_rate": 9.548952543822148e-05, + "loss": 0.6669, + "step": 2210 + }, + { + "epoch": 0.09, + "learning_rate": 9.546814878153058e-05, + "loss": 0.5951, + "step": 2220 + }, + { + "epoch": 0.1, + "learning_rate": 9.544677212483967e-05, + "loss": 0.6599, + "step": 2230 + }, + { + "epoch": 0.1, + "learning_rate": 9.542539546814878e-05, + "loss": 0.6153, + "step": 2240 + }, + { + "epoch": 0.1, + "learning_rate": 9.54040188114579e-05, + "loss": 0.6057, + "step": 2250 + }, + { + "epoch": 0.1, + "learning_rate": 9.5382642154767e-05, + "loss": 0.6661, + "step": 2260 + }, + { + "epoch": 0.1, + "learning_rate": 9.53612654980761e-05, + "loss": 0.5933, + "step": 2270 + }, + { + "epoch": 0.1, + "learning_rate": 9.533988884138522e-05, + "loss": 0.6529, + "step": 2280 + }, + { + "epoch": 0.1, + "learning_rate": 9.531851218469433e-05, + "loss": 0.6165, + "step": 2290 + }, + { + "epoch": 0.1, + "learning_rate": 9.529713552800342e-05, + "loss": 0.5967, + "step": 2300 + }, + { + "epoch": 0.1, + "learning_rate": 9.527575887131252e-05, + "loss": 0.6707, + "step": 2310 + }, + { + "epoch": 0.1, + "learning_rate": 9.525438221462164e-05, + "loss": 0.6014, + "step": 2320 + }, + { + "epoch": 0.1, + "learning_rate": 9.523300555793074e-05, + "loss": 0.6573, + "step": 2330 + }, + { + "epoch": 0.1, + "learning_rate": 9.521162890123985e-05, + "loss": 0.6105, + "step": 2340 + }, + { + "epoch": 0.1, + "learning_rate": 9.519025224454897e-05, + "loss": 0.5912, + "step": 2350 + }, + { + "epoch": 0.1, + "learning_rate": 9.516887558785806e-05, + "loss": 0.6606, + "step": 2360 + }, + { + "epoch": 0.1, + "learning_rate": 9.514749893116716e-05, + "loss": 0.5957, + "step": 2370 + }, + { + "epoch": 0.1, + "learning_rate": 9.512612227447628e-05, + "loss": 0.6523, + "step": 2380 + }, + { + "epoch": 0.1, + "learning_rate": 9.510474561778538e-05, + "loss": 0.6125, + "step": 2390 + }, + { + "epoch": 0.1, + "learning_rate": 9.508336896109449e-05, + "loss": 0.5934, + "step": 2400 + }, + { + "epoch": 0.1, + "learning_rate": 9.50619923044036e-05, + "loss": 0.6668, + "step": 2410 + }, + { + "epoch": 0.1, + "learning_rate": 9.50406156477127e-05, + "loss": 0.6003, + "step": 2420 + }, + { + "epoch": 0.1, + "learning_rate": 9.50192389910218e-05, + "loss": 0.6554, + "step": 2430 + }, + { + "epoch": 0.1, + "learning_rate": 9.499786233433091e-05, + "loss": 0.6118, + "step": 2440 + }, + { + "epoch": 0.1, + "learning_rate": 9.497648567764003e-05, + "loss": 0.5913, + "step": 2450 + }, + { + "epoch": 0.1, + "learning_rate": 9.495510902094913e-05, + "loss": 0.6718, + "step": 2460 + }, + { + "epoch": 0.11, + "learning_rate": 9.493373236425823e-05, + "loss": 0.601, + "step": 2470 + }, + { + "epoch": 0.11, + "learning_rate": 9.491235570756735e-05, + "loss": 0.658, + "step": 2480 + }, + { + "epoch": 0.11, + "learning_rate": 9.489097905087644e-05, + "loss": 0.6048, + "step": 2490 + }, + { + "epoch": 0.11, + "learning_rate": 9.486960239418555e-05, + "loss": 0.5941, + "step": 2500 + }, + { + "epoch": 0.11, + "learning_rate": 9.484822573749465e-05, + "loss": 0.661, + "step": 2510 + }, + { + "epoch": 0.11, + "learning_rate": 9.482684908080377e-05, + "loss": 0.5939, + "step": 2520 + }, + { + "epoch": 0.11, + "learning_rate": 9.480547242411288e-05, + "loss": 0.663, + "step": 2530 + }, + { + "epoch": 0.11, + "learning_rate": 9.478409576742198e-05, + "loss": 0.6024, + "step": 2540 + }, + { + "epoch": 0.11, + "learning_rate": 9.476271911073108e-05, + "loss": 0.5997, + "step": 2550 + }, + { + "epoch": 0.11, + "learning_rate": 9.474134245404019e-05, + "loss": 0.6667, + "step": 2560 + }, + { + "epoch": 0.11, + "learning_rate": 9.47199657973493e-05, + "loss": 0.5929, + "step": 2570 + }, + { + "epoch": 0.11, + "learning_rate": 9.46985891406584e-05, + "loss": 0.656, + "step": 2580 + }, + { + "epoch": 0.11, + "learning_rate": 9.467721248396752e-05, + "loss": 0.603, + "step": 2590 + }, + { + "epoch": 0.11, + "learning_rate": 9.465583582727662e-05, + "loss": 0.587, + "step": 2600 + }, + { + "epoch": 0.11, + "learning_rate": 9.463445917058572e-05, + "loss": 0.6677, + "step": 2610 + }, + { + "epoch": 0.11, + "learning_rate": 9.461308251389483e-05, + "loss": 0.5948, + "step": 2620 + }, + { + "epoch": 0.11, + "learning_rate": 9.459170585720393e-05, + "loss": 0.6529, + "step": 2630 + }, + { + "epoch": 0.11, + "learning_rate": 9.457032920051304e-05, + "loss": 0.614, + "step": 2640 + }, + { + "epoch": 0.11, + "learning_rate": 9.454895254382216e-05, + "loss": 0.5877, + "step": 2650 + }, + { + "epoch": 0.11, + "learning_rate": 9.452757588713126e-05, + "loss": 0.6607, + "step": 2660 + }, + { + "epoch": 0.11, + "learning_rate": 9.450619923044037e-05, + "loss": 0.5967, + "step": 2670 + }, + { + "epoch": 0.11, + "learning_rate": 9.448482257374947e-05, + "loss": 0.6532, + "step": 2680 + }, + { + "epoch": 0.11, + "learning_rate": 9.446344591705857e-05, + "loss": 0.6089, + "step": 2690 + }, + { + "epoch": 0.12, + "learning_rate": 9.444206926036768e-05, + "loss": 0.5906, + "step": 2700 + }, + { + "epoch": 0.12, + "learning_rate": 9.442069260367678e-05, + "loss": 0.6591, + "step": 2710 + }, + { + "epoch": 0.12, + "learning_rate": 9.43993159469859e-05, + "loss": 0.5896, + "step": 2720 + }, + { + "epoch": 0.12, + "learning_rate": 9.4377939290295e-05, + "loss": 0.6447, + "step": 2730 + }, + { + "epoch": 0.12, + "learning_rate": 9.435656263360411e-05, + "loss": 0.6124, + "step": 2740 + }, + { + "epoch": 0.12, + "learning_rate": 9.433518597691322e-05, + "loss": 0.5872, + "step": 2750 + }, + { + "epoch": 0.12, + "learning_rate": 9.431380932022232e-05, + "loss": 0.6651, + "step": 2760 + }, + { + "epoch": 0.12, + "learning_rate": 9.429243266353142e-05, + "loss": 0.6045, + "step": 2770 + }, + { + "epoch": 0.12, + "learning_rate": 9.427105600684053e-05, + "loss": 0.6543, + "step": 2780 + }, + { + "epoch": 0.12, + "learning_rate": 9.424967935014965e-05, + "loss": 0.6058, + "step": 2790 + }, + { + "epoch": 0.12, + "learning_rate": 9.422830269345875e-05, + "loss": 0.5967, + "step": 2800 + }, + { + "epoch": 0.12, + "learning_rate": 9.420692603676786e-05, + "loss": 0.6558, + "step": 2810 + }, + { + "epoch": 0.12, + "learning_rate": 9.418554938007696e-05, + "loss": 0.5979, + "step": 2820 + }, + { + "epoch": 0.12, + "learning_rate": 9.416417272338606e-05, + "loss": 0.6561, + "step": 2830 + }, + { + "epoch": 0.12, + "learning_rate": 9.414279606669517e-05, + "loss": 0.6006, + "step": 2840 + }, + { + "epoch": 0.12, + "learning_rate": 9.412141941000427e-05, + "loss": 0.5965, + "step": 2850 + }, + { + "epoch": 0.12, + "learning_rate": 9.410004275331339e-05, + "loss": 0.6693, + "step": 2860 + }, + { + "epoch": 0.12, + "learning_rate": 9.40786660966225e-05, + "loss": 0.5925, + "step": 2870 + }, + { + "epoch": 0.12, + "learning_rate": 9.40572894399316e-05, + "loss": 0.6511, + "step": 2880 + }, + { + "epoch": 0.12, + "learning_rate": 9.40359127832407e-05, + "loss": 0.6053, + "step": 2890 + }, + { + "epoch": 0.12, + "learning_rate": 9.401453612654981e-05, + "loss": 0.5906, + "step": 2900 + }, + { + "epoch": 0.12, + "learning_rate": 9.399315946985891e-05, + "loss": 0.6595, + "step": 2910 + }, + { + "epoch": 0.12, + "learning_rate": 9.397178281316803e-05, + "loss": 0.5942, + "step": 2920 + }, + { + "epoch": 0.12, + "learning_rate": 9.395040615647714e-05, + "loss": 0.6487, + "step": 2930 + }, + { + "epoch": 0.13, + "learning_rate": 9.392902949978624e-05, + "loss": 0.6089, + "step": 2940 + }, + { + "epoch": 0.13, + "learning_rate": 9.390765284309535e-05, + "loss": 0.593, + "step": 2950 + }, + { + "epoch": 0.13, + "learning_rate": 9.388627618640445e-05, + "loss": 0.6626, + "step": 2960 + }, + { + "epoch": 0.13, + "learning_rate": 9.386489952971356e-05, + "loss": 0.5995, + "step": 2970 + }, + { + "epoch": 0.13, + "learning_rate": 9.384352287302266e-05, + "loss": 0.6542, + "step": 2980 + }, + { + "epoch": 0.13, + "learning_rate": 9.382214621633178e-05, + "loss": 0.6091, + "step": 2990 + }, + { + "epoch": 0.13, + "learning_rate": 9.380076955964088e-05, + "loss": 0.584, + "step": 3000 + }, + { + "epoch": 0.13, + "learning_rate": 9.377939290294999e-05, + "loss": 0.6548, + "step": 3010 + }, + { + "epoch": 0.13, + "learning_rate": 9.375801624625909e-05, + "loss": 0.5847, + "step": 3020 + }, + { + "epoch": 0.13, + "learning_rate": 9.37366395895682e-05, + "loss": 0.6549, + "step": 3030 + }, + { + "epoch": 0.13, + "learning_rate": 9.37152629328773e-05, + "loss": 0.6026, + "step": 3040 + }, + { + "epoch": 0.13, + "learning_rate": 9.36938862761864e-05, + "loss": 0.5814, + "step": 3050 + }, + { + "epoch": 0.13, + "learning_rate": 9.367250961949552e-05, + "loss": 0.6578, + "step": 3060 + }, + { + "epoch": 0.13, + "learning_rate": 9.365113296280463e-05, + "loss": 0.59, + "step": 3070 + }, + { + "epoch": 0.13, + "learning_rate": 9.362975630611373e-05, + "loss": 0.6517, + "step": 3080 + }, + { + "epoch": 0.13, + "learning_rate": 9.360837964942284e-05, + "loss": 0.6062, + "step": 3090 + }, + { + "epoch": 0.13, + "learning_rate": 9.358700299273194e-05, + "loss": 0.5843, + "step": 3100 + }, + { + "epoch": 0.13, + "learning_rate": 9.356562633604105e-05, + "loss": 0.657, + "step": 3110 + }, + { + "epoch": 0.13, + "learning_rate": 9.354424967935015e-05, + "loss": 0.5895, + "step": 3120 + }, + { + "epoch": 0.13, + "learning_rate": 9.352287302265927e-05, + "loss": 0.6563, + "step": 3130 + }, + { + "epoch": 0.13, + "learning_rate": 9.350149636596837e-05, + "loss": 0.5949, + "step": 3140 + }, + { + "epoch": 0.13, + "learning_rate": 9.348011970927746e-05, + "loss": 0.5849, + "step": 3150 + }, + { + "epoch": 0.13, + "learning_rate": 9.345874305258658e-05, + "loss": 0.6568, + "step": 3160 + }, + { + "epoch": 0.14, + "learning_rate": 9.343736639589569e-05, + "loss": 0.5969, + "step": 3170 + }, + { + "epoch": 0.14, + "learning_rate": 9.341598973920479e-05, + "loss": 0.6429, + "step": 3180 + }, + { + "epoch": 0.14, + "learning_rate": 9.339461308251391e-05, + "loss": 0.6022, + "step": 3190 + }, + { + "epoch": 0.14, + "learning_rate": 9.337323642582301e-05, + "loss": 0.5984, + "step": 3200 + }, + { + "epoch": 0.14, + "learning_rate": 9.33518597691321e-05, + "loss": 0.6602, + "step": 3210 + }, + { + "epoch": 0.14, + "learning_rate": 9.333048311244121e-05, + "loss": 0.5919, + "step": 3220 + }, + { + "epoch": 0.14, + "learning_rate": 9.330910645575033e-05, + "loss": 0.6588, + "step": 3230 + }, + { + "epoch": 0.14, + "learning_rate": 9.328772979905943e-05, + "loss": 0.5954, + "step": 3240 + }, + { + "epoch": 0.14, + "learning_rate": 9.326635314236854e-05, + "loss": 0.5893, + "step": 3250 + }, + { + "epoch": 0.14, + "learning_rate": 9.324497648567765e-05, + "loss": 0.657, + "step": 3260 + }, + { + "epoch": 0.14, + "learning_rate": 9.322359982898676e-05, + "loss": 0.5842, + "step": 3270 + }, + { + "epoch": 0.14, + "learning_rate": 9.320222317229585e-05, + "loss": 0.6536, + "step": 3280 + }, + { + "epoch": 0.14, + "learning_rate": 9.318084651560497e-05, + "loss": 0.6089, + "step": 3290 + }, + { + "epoch": 0.14, + "learning_rate": 9.315946985891407e-05, + "loss": 0.5822, + "step": 3300 + }, + { + "epoch": 0.14, + "learning_rate": 9.313809320222318e-05, + "loss": 0.6519, + "step": 3310 + }, + { + "epoch": 0.14, + "learning_rate": 9.311671654553228e-05, + "loss": 0.5863, + "step": 3320 + }, + { + "epoch": 0.14, + "learning_rate": 9.30953398888414e-05, + "loss": 0.6494, + "step": 3330 + }, + { + "epoch": 0.14, + "learning_rate": 9.307396323215049e-05, + "loss": 0.6011, + "step": 3340 + }, + { + "epoch": 0.14, + "learning_rate": 9.30525865754596e-05, + "loss": 0.5921, + "step": 3350 + }, + { + "epoch": 0.14, + "learning_rate": 9.303120991876871e-05, + "loss": 0.664, + "step": 3360 + }, + { + "epoch": 0.14, + "learning_rate": 9.300983326207782e-05, + "loss": 0.5861, + "step": 3370 + }, + { + "epoch": 0.14, + "learning_rate": 9.298845660538692e-05, + "loss": 0.6516, + "step": 3380 + }, + { + "epoch": 0.14, + "learning_rate": 9.296707994869603e-05, + "loss": 0.6073, + "step": 3390 + }, + { + "epoch": 0.15, + "learning_rate": 9.294570329200514e-05, + "loss": 0.5918, + "step": 3400 + }, + { + "epoch": 0.15, + "learning_rate": 9.292432663531423e-05, + "loss": 0.6567, + "step": 3410 + }, + { + "epoch": 0.15, + "learning_rate": 9.290294997862334e-05, + "loss": 0.5793, + "step": 3420 + }, + { + "epoch": 0.15, + "learning_rate": 9.288157332193246e-05, + "loss": 0.6401, + "step": 3430 + }, + { + "epoch": 0.15, + "learning_rate": 9.286019666524156e-05, + "loss": 0.5984, + "step": 3440 + }, + { + "epoch": 0.15, + "learning_rate": 9.283882000855067e-05, + "loss": 0.5841, + "step": 3450 + }, + { + "epoch": 0.15, + "learning_rate": 9.281744335185978e-05, + "loss": 0.6481, + "step": 3460 + }, + { + "epoch": 0.15, + "learning_rate": 9.279606669516888e-05, + "loss": 0.5909, + "step": 3470 + }, + { + "epoch": 0.15, + "learning_rate": 9.277469003847798e-05, + "loss": 0.6462, + "step": 3480 + }, + { + "epoch": 0.15, + "learning_rate": 9.275331338178708e-05, + "loss": 0.5925, + "step": 3490 + }, + { + "epoch": 0.15, + "learning_rate": 9.27319367250962e-05, + "loss": 0.5825, + "step": 3500 + }, + { + "epoch": 0.15, + "learning_rate": 9.271056006840531e-05, + "loss": 0.6616, + "step": 3510 + }, + { + "epoch": 0.15, + "learning_rate": 9.268918341171441e-05, + "loss": 0.5844, + "step": 3520 + }, + { + "epoch": 0.15, + "learning_rate": 9.266780675502353e-05, + "loss": 0.6455, + "step": 3530 + }, + { + "epoch": 0.15, + "learning_rate": 9.264643009833262e-05, + "loss": 0.5955, + "step": 3540 + }, + { + "epoch": 0.15, + "learning_rate": 9.262505344164173e-05, + "loss": 0.5768, + "step": 3550 + }, + { + "epoch": 0.15, + "learning_rate": 9.260367678495084e-05, + "loss": 0.6591, + "step": 3560 + }, + { + "epoch": 0.15, + "learning_rate": 9.258230012825995e-05, + "loss": 0.5865, + "step": 3570 + }, + { + "epoch": 0.15, + "learning_rate": 9.256092347156905e-05, + "loss": 0.6469, + "step": 3580 + }, + { + "epoch": 0.15, + "learning_rate": 9.253954681487816e-05, + "loss": 0.605, + "step": 3590 + }, + { + "epoch": 0.15, + "learning_rate": 9.251817015818726e-05, + "loss": 0.5949, + "step": 3600 + }, + { + "epoch": 0.15, + "learning_rate": 9.249679350149637e-05, + "loss": 0.6615, + "step": 3610 + }, + { + "epoch": 0.15, + "learning_rate": 9.247541684480547e-05, + "loss": 0.5788, + "step": 3620 + }, + { + "epoch": 0.15, + "learning_rate": 9.245404018811459e-05, + "loss": 0.6447, + "step": 3630 + }, + { + "epoch": 0.16, + "learning_rate": 9.243266353142369e-05, + "loss": 0.5975, + "step": 3640 + }, + { + "epoch": 0.16, + "learning_rate": 9.24112868747328e-05, + "loss": 0.5839, + "step": 3650 + }, + { + "epoch": 0.16, + "learning_rate": 9.23899102180419e-05, + "loss": 0.6566, + "step": 3660 + }, + { + "epoch": 0.16, + "learning_rate": 9.2368533561351e-05, + "loss": 0.5788, + "step": 3670 + }, + { + "epoch": 0.16, + "learning_rate": 9.234715690466011e-05, + "loss": 0.6503, + "step": 3680 + }, + { + "epoch": 0.16, + "learning_rate": 9.232578024796922e-05, + "loss": 0.6127, + "step": 3690 + }, + { + "epoch": 0.16, + "learning_rate": 9.230440359127833e-05, + "loss": 0.5776, + "step": 3700 + }, + { + "epoch": 0.16, + "learning_rate": 9.228302693458744e-05, + "loss": 0.6515, + "step": 3710 + }, + { + "epoch": 0.16, + "learning_rate": 9.226165027789654e-05, + "loss": 0.576, + "step": 3720 + }, + { + "epoch": 0.16, + "learning_rate": 9.224027362120565e-05, + "loss": 0.6421, + "step": 3730 + }, + { + "epoch": 0.16, + "learning_rate": 9.221889696451475e-05, + "loss": 0.5975, + "step": 3740 + }, + { + "epoch": 0.16, + "learning_rate": 9.219752030782386e-05, + "loss": 0.5839, + "step": 3750 + }, + { + "epoch": 0.16, + "learning_rate": 9.217614365113296e-05, + "loss": 0.6532, + "step": 3760 + }, + { + "epoch": 0.16, + "learning_rate": 9.215476699444208e-05, + "loss": 0.5793, + "step": 3770 + }, + { + "epoch": 0.16, + "learning_rate": 9.213339033775118e-05, + "loss": 0.6487, + "step": 3780 + }, + { + "epoch": 0.16, + "learning_rate": 9.211201368106029e-05, + "loss": 0.5978, + "step": 3790 + }, + { + "epoch": 0.16, + "learning_rate": 9.209063702436939e-05, + "loss": 0.5986, + "step": 3800 + }, + { + "epoch": 0.16, + "learning_rate": 9.20692603676785e-05, + "loss": 0.6474, + "step": 3810 + }, + { + "epoch": 0.16, + "learning_rate": 9.20478837109876e-05, + "loss": 0.5892, + "step": 3820 + }, + { + "epoch": 0.16, + "learning_rate": 9.202650705429672e-05, + "loss": 0.6548, + "step": 3830 + }, + { + "epoch": 0.16, + "learning_rate": 9.200513039760582e-05, + "loss": 0.589, + "step": 3840 + }, + { + "epoch": 0.16, + "learning_rate": 9.198375374091493e-05, + "loss": 0.5966, + "step": 3850 + }, + { + "epoch": 0.16, + "learning_rate": 9.196237708422403e-05, + "loss": 0.6635, + "step": 3860 + }, + { + "epoch": 0.17, + "learning_rate": 9.194100042753314e-05, + "loss": 0.5753, + "step": 3870 + }, + { + "epoch": 0.17, + "learning_rate": 9.191962377084224e-05, + "loss": 0.6437, + "step": 3880 + }, + { + "epoch": 0.17, + "learning_rate": 9.189824711415135e-05, + "loss": 0.6072, + "step": 3890 + }, + { + "epoch": 0.17, + "learning_rate": 9.187687045746046e-05, + "loss": 0.5745, + "step": 3900 + }, + { + "epoch": 0.17, + "learning_rate": 9.185549380076957e-05, + "loss": 0.6521, + "step": 3910 + }, + { + "epoch": 0.17, + "learning_rate": 9.183411714407867e-05, + "loss": 0.5728, + "step": 3920 + }, + { + "epoch": 0.17, + "learning_rate": 9.181274048738778e-05, + "loss": 0.6489, + "step": 3930 + }, + { + "epoch": 0.17, + "learning_rate": 9.179136383069688e-05, + "loss": 0.6041, + "step": 3940 + }, + { + "epoch": 0.17, + "learning_rate": 9.176998717400599e-05, + "loss": 0.5781, + "step": 3950 + }, + { + "epoch": 0.17, + "learning_rate": 9.174861051731509e-05, + "loss": 0.6585, + "step": 3960 + }, + { + "epoch": 0.17, + "learning_rate": 9.172723386062421e-05, + "loss": 0.5896, + "step": 3970 + }, + { + "epoch": 0.17, + "learning_rate": 9.170585720393331e-05, + "loss": 0.6493, + "step": 3980 + }, + { + "epoch": 0.17, + "learning_rate": 9.168448054724242e-05, + "loss": 0.5997, + "step": 3990 + }, + { + "epoch": 0.17, + "learning_rate": 9.166310389055152e-05, + "loss": 0.5892, + "step": 4000 + }, + { + "epoch": 0.17, + "learning_rate": 9.164172723386063e-05, + "loss": 0.6706, + "step": 4010 + }, + { + "epoch": 0.17, + "learning_rate": 9.162035057716973e-05, + "loss": 0.5901, + "step": 4020 + }, + { + "epoch": 0.17, + "learning_rate": 9.159897392047884e-05, + "loss": 0.647, + "step": 4030 + }, + { + "epoch": 0.17, + "learning_rate": 9.157759726378795e-05, + "loss": 0.5852, + "step": 4040 + }, + { + "epoch": 0.17, + "learning_rate": 9.155622060709706e-05, + "loss": 0.5794, + "step": 4050 + }, + { + "epoch": 0.17, + "learning_rate": 9.153484395040616e-05, + "loss": 0.6538, + "step": 4060 + }, + { + "epoch": 0.17, + "learning_rate": 9.151346729371527e-05, + "loss": 0.5815, + "step": 4070 + }, + { + "epoch": 0.17, + "learning_rate": 9.149209063702437e-05, + "loss": 0.6542, + "step": 4080 + }, + { + "epoch": 0.17, + "learning_rate": 9.147071398033348e-05, + "loss": 0.6025, + "step": 4090 + }, + { + "epoch": 0.17, + "learning_rate": 9.14493373236426e-05, + "loss": 0.5771, + "step": 4100 + }, + { + "epoch": 0.18, + "learning_rate": 9.14279606669517e-05, + "loss": 0.6488, + "step": 4110 + }, + { + "epoch": 0.18, + "learning_rate": 9.14065840102608e-05, + "loss": 0.573, + "step": 4120 + }, + { + "epoch": 0.18, + "learning_rate": 9.13852073535699e-05, + "loss": 0.6592, + "step": 4130 + }, + { + "epoch": 0.18, + "learning_rate": 9.136383069687901e-05, + "loss": 0.5861, + "step": 4140 + }, + { + "epoch": 0.18, + "learning_rate": 9.134245404018812e-05, + "loss": 0.5789, + "step": 4150 + }, + { + "epoch": 0.18, + "learning_rate": 9.132107738349722e-05, + "loss": 0.6573, + "step": 4160 + }, + { + "epoch": 0.18, + "learning_rate": 9.129970072680634e-05, + "loss": 0.5851, + "step": 4170 + }, + { + "epoch": 0.18, + "learning_rate": 9.127832407011544e-05, + "loss": 0.649, + "step": 4180 + }, + { + "epoch": 0.18, + "learning_rate": 9.125694741342455e-05, + "loss": 0.6037, + "step": 4190 + }, + { + "epoch": 0.18, + "learning_rate": 9.123557075673364e-05, + "loss": 0.5902, + "step": 4200 + }, + { + "epoch": 0.18, + "learning_rate": 9.121419410004276e-05, + "loss": 0.6592, + "step": 4210 + }, + { + "epoch": 0.18, + "learning_rate": 9.119281744335186e-05, + "loss": 0.582, + "step": 4220 + }, + { + "epoch": 0.18, + "learning_rate": 9.117144078666097e-05, + "loss": 0.6449, + "step": 4230 + }, + { + "epoch": 0.18, + "learning_rate": 9.115006412997009e-05, + "loss": 0.6057, + "step": 4240 + }, + { + "epoch": 0.18, + "learning_rate": 9.112868747327919e-05, + "loss": 0.5951, + "step": 4250 + }, + { + "epoch": 0.18, + "learning_rate": 9.110731081658828e-05, + "loss": 0.662, + "step": 4260 + }, + { + "epoch": 0.18, + "learning_rate": 9.10859341598974e-05, + "loss": 0.5941, + "step": 4270 + }, + { + "epoch": 0.18, + "learning_rate": 9.10645575032065e-05, + "loss": 0.6517, + "step": 4280 + }, + { + "epoch": 0.18, + "learning_rate": 9.104318084651561e-05, + "loss": 0.5868, + "step": 4290 + }, + { + "epoch": 0.18, + "learning_rate": 9.102180418982471e-05, + "loss": 0.5859, + "step": 4300 + }, + { + "epoch": 0.18, + "learning_rate": 9.100042753313383e-05, + "loss": 0.6494, + "step": 4310 + }, + { + "epoch": 0.18, + "learning_rate": 9.097905087644294e-05, + "loss": 0.5869, + "step": 4320 + }, + { + "epoch": 0.18, + "learning_rate": 9.095767421975203e-05, + "loss": 0.6384, + "step": 4330 + }, + { + "epoch": 0.19, + "learning_rate": 9.093629756306114e-05, + "loss": 0.5882, + "step": 4340 + }, + { + "epoch": 0.19, + "learning_rate": 9.091492090637025e-05, + "loss": 0.5833, + "step": 4350 + }, + { + "epoch": 0.19, + "learning_rate": 9.089354424967935e-05, + "loss": 0.6447, + "step": 4360 + }, + { + "epoch": 0.19, + "learning_rate": 9.087216759298847e-05, + "loss": 0.5842, + "step": 4370 + }, + { + "epoch": 0.19, + "learning_rate": 9.085079093629758e-05, + "loss": 0.6514, + "step": 4380 + }, + { + "epoch": 0.19, + "learning_rate": 9.082941427960667e-05, + "loss": 0.5904, + "step": 4390 + }, + { + "epoch": 0.19, + "learning_rate": 9.080803762291577e-05, + "loss": 0.5752, + "step": 4400 + }, + { + "epoch": 0.19, + "learning_rate": 9.078666096622489e-05, + "loss": 0.6477, + "step": 4410 + }, + { + "epoch": 0.19, + "learning_rate": 9.0765284309534e-05, + "loss": 0.5775, + "step": 4420 + }, + { + "epoch": 0.19, + "learning_rate": 9.07439076528431e-05, + "loss": 0.6417, + "step": 4430 + }, + { + "epoch": 0.19, + "learning_rate": 9.072253099615222e-05, + "loss": 0.5974, + "step": 4440 + }, + { + "epoch": 0.19, + "learning_rate": 9.070115433946131e-05, + "loss": 0.5723, + "step": 4450 + }, + { + "epoch": 0.19, + "learning_rate": 9.067977768277041e-05, + "loss": 0.6443, + "step": 4460 + }, + { + "epoch": 0.19, + "learning_rate": 9.065840102607952e-05, + "loss": 0.5811, + "step": 4470 + }, + { + "epoch": 0.19, + "learning_rate": 9.063702436938863e-05, + "loss": 0.6378, + "step": 4480 + }, + { + "epoch": 0.19, + "learning_rate": 9.061564771269774e-05, + "loss": 0.5967, + "step": 4490 + }, + { + "epoch": 0.19, + "learning_rate": 9.059427105600684e-05, + "loss": 0.5773, + "step": 4500 + }, + { + "epoch": 0.19, + "learning_rate": 9.057289439931596e-05, + "loss": 0.6508, + "step": 4510 + }, + { + "epoch": 0.19, + "learning_rate": 9.055151774262505e-05, + "loss": 0.582, + "step": 4520 + }, + { + "epoch": 0.19, + "learning_rate": 9.053014108593416e-05, + "loss": 0.6422, + "step": 4530 + }, + { + "epoch": 0.19, + "learning_rate": 9.050876442924327e-05, + "loss": 0.5941, + "step": 4540 + }, + { + "epoch": 0.19, + "learning_rate": 9.048738777255238e-05, + "loss": 0.5738, + "step": 4550 + }, + { + "epoch": 0.19, + "learning_rate": 9.046601111586148e-05, + "loss": 0.6496, + "step": 4560 + }, + { + "epoch": 0.19, + "learning_rate": 9.044463445917059e-05, + "loss": 0.5751, + "step": 4570 + }, + { + "epoch": 0.2, + "learning_rate": 9.042325780247969e-05, + "loss": 0.6402, + "step": 4580 + }, + { + "epoch": 0.2, + "learning_rate": 9.04018811457888e-05, + "loss": 0.5927, + "step": 4590 + }, + { + "epoch": 0.2, + "learning_rate": 9.03805044890979e-05, + "loss": 0.5769, + "step": 4600 + }, + { + "epoch": 0.2, + "learning_rate": 9.035912783240702e-05, + "loss": 0.6474, + "step": 4610 + }, + { + "epoch": 0.2, + "learning_rate": 9.033775117571612e-05, + "loss": 0.5871, + "step": 4620 + }, + { + "epoch": 0.2, + "learning_rate": 9.031637451902523e-05, + "loss": 0.6459, + "step": 4630 + }, + { + "epoch": 0.2, + "learning_rate": 9.029499786233435e-05, + "loss": 0.5918, + "step": 4640 + }, + { + "epoch": 0.2, + "learning_rate": 9.027362120564344e-05, + "loss": 0.5836, + "step": 4650 + }, + { + "epoch": 0.2, + "learning_rate": 9.025224454895254e-05, + "loss": 0.6451, + "step": 4660 + }, + { + "epoch": 0.2, + "learning_rate": 9.023086789226165e-05, + "loss": 0.5762, + "step": 4670 + }, + { + "epoch": 0.2, + "learning_rate": 9.020949123557077e-05, + "loss": 0.6443, + "step": 4680 + }, + { + "epoch": 0.2, + "learning_rate": 9.018811457887987e-05, + "loss": 0.5904, + "step": 4690 + }, + { + "epoch": 0.2, + "learning_rate": 9.016673792218897e-05, + "loss": 0.5725, + "step": 4700 + }, + { + "epoch": 0.2, + "learning_rate": 9.014536126549808e-05, + "loss": 0.6333, + "step": 4710 + }, + { + "epoch": 0.2, + "learning_rate": 9.012398460880718e-05, + "loss": 0.5778, + "step": 4720 + }, + { + "epoch": 0.2, + "learning_rate": 9.010260795211629e-05, + "loss": 0.6443, + "step": 4730 + }, + { + "epoch": 0.2, + "learning_rate": 9.008123129542539e-05, + "loss": 0.5872, + "step": 4740 + }, + { + "epoch": 0.2, + "learning_rate": 9.005985463873451e-05, + "loss": 0.5788, + "step": 4750 + }, + { + "epoch": 0.2, + "learning_rate": 9.003847798204361e-05, + "loss": 0.6497, + "step": 4760 + }, + { + "epoch": 0.2, + "learning_rate": 9.001710132535272e-05, + "loss": 0.5853, + "step": 4770 + }, + { + "epoch": 0.2, + "learning_rate": 8.999572466866182e-05, + "loss": 0.6395, + "step": 4780 + }, + { + "epoch": 0.2, + "learning_rate": 8.997434801197093e-05, + "loss": 0.6032, + "step": 4790 + }, + { + "epoch": 0.2, + "learning_rate": 8.995297135528003e-05, + "loss": 0.5695, + "step": 4800 + }, + { + "epoch": 0.21, + "learning_rate": 8.993159469858915e-05, + "loss": 0.6521, + "step": 4810 + }, + { + "epoch": 0.21, + "learning_rate": 8.991021804189826e-05, + "loss": 0.5717, + "step": 4820 + }, + { + "epoch": 0.21, + "learning_rate": 8.988884138520736e-05, + "loss": 0.6481, + "step": 4830 + }, + { + "epoch": 0.21, + "learning_rate": 8.986746472851646e-05, + "loss": 0.5899, + "step": 4840 + }, + { + "epoch": 0.21, + "learning_rate": 8.984608807182557e-05, + "loss": 0.5897, + "step": 4850 + }, + { + "epoch": 0.21, + "learning_rate": 8.982471141513467e-05, + "loss": 0.641, + "step": 4860 + }, + { + "epoch": 0.21, + "learning_rate": 8.980333475844378e-05, + "loss": 0.5832, + "step": 4870 + }, + { + "epoch": 0.21, + "learning_rate": 8.97819581017529e-05, + "loss": 0.6438, + "step": 4880 + }, + { + "epoch": 0.21, + "learning_rate": 8.9760581445062e-05, + "loss": 0.5963, + "step": 4890 + }, + { + "epoch": 0.21, + "learning_rate": 8.97392047883711e-05, + "loss": 0.5721, + "step": 4900 + }, + { + "epoch": 0.21, + "learning_rate": 8.971782813168021e-05, + "loss": 0.6455, + "step": 4910 + }, + { + "epoch": 0.21, + "learning_rate": 8.969645147498931e-05, + "loss": 0.562, + "step": 4920 + }, + { + "epoch": 0.21, + "learning_rate": 8.967507481829842e-05, + "loss": 0.644, + "step": 4930 + }, + { + "epoch": 0.21, + "learning_rate": 8.965369816160752e-05, + "loss": 0.5944, + "step": 4940 + }, + { + "epoch": 0.21, + "learning_rate": 8.963232150491664e-05, + "loss": 0.5806, + "step": 4950 + }, + { + "epoch": 0.21, + "learning_rate": 8.961094484822575e-05, + "loss": 0.6485, + "step": 4960 + }, + { + "epoch": 0.21, + "learning_rate": 8.958956819153485e-05, + "loss": 0.585, + "step": 4970 + }, + { + "epoch": 0.21, + "learning_rate": 8.956819153484395e-05, + "loss": 0.6438, + "step": 4980 + }, + { + "epoch": 0.21, + "learning_rate": 8.954681487815306e-05, + "loss": 0.5973, + "step": 4990 + }, + { + "epoch": 0.21, + "learning_rate": 8.952543822146216e-05, + "loss": 0.5681, + "step": 5000 + }, + { + "epoch": 0.21, + "learning_rate": 8.950406156477127e-05, + "loss": 0.6313, + "step": 5010 + }, + { + "epoch": 0.21, + "learning_rate": 8.948268490808039e-05, + "loss": 0.5741, + "step": 5020 + }, + { + "epoch": 0.21, + "learning_rate": 8.946130825138949e-05, + "loss": 0.6429, + "step": 5030 + }, + { + "epoch": 0.22, + "learning_rate": 8.94399315946986e-05, + "loss": 0.5905, + "step": 5040 + }, + { + "epoch": 0.22, + "learning_rate": 8.94185549380077e-05, + "loss": 0.58, + "step": 5050 + }, + { + "epoch": 0.22, + "learning_rate": 8.93971782813168e-05, + "loss": 0.6431, + "step": 5060 + }, + { + "epoch": 0.22, + "learning_rate": 8.937580162462591e-05, + "loss": 0.5747, + "step": 5070 + }, + { + "epoch": 0.22, + "learning_rate": 8.935442496793503e-05, + "loss": 0.6399, + "step": 5080 + }, + { + "epoch": 0.22, + "learning_rate": 8.933304831124413e-05, + "loss": 0.5846, + "step": 5090 + }, + { + "epoch": 0.22, + "learning_rate": 8.931167165455324e-05, + "loss": 0.5798, + "step": 5100 + }, + { + "epoch": 0.22, + "learning_rate": 8.929029499786234e-05, + "loss": 0.6438, + "step": 5110 + }, + { + "epoch": 0.22, + "learning_rate": 8.926891834117144e-05, + "loss": 0.5852, + "step": 5120 + }, + { + "epoch": 0.22, + "learning_rate": 8.924754168448055e-05, + "loss": 0.6354, + "step": 5130 + }, + { + "epoch": 0.22, + "learning_rate": 8.922616502778965e-05, + "loss": 0.5967, + "step": 5140 + }, + { + "epoch": 0.22, + "learning_rate": 8.920478837109877e-05, + "loss": 0.5783, + "step": 5150 + }, + { + "epoch": 0.22, + "learning_rate": 8.918341171440788e-05, + "loss": 0.6457, + "step": 5160 + }, + { + "epoch": 0.22, + "learning_rate": 8.916203505771698e-05, + "loss": 0.5745, + "step": 5170 + }, + { + "epoch": 0.22, + "learning_rate": 8.914065840102609e-05, + "loss": 0.633, + "step": 5180 + }, + { + "epoch": 0.22, + "learning_rate": 8.911928174433519e-05, + "loss": 0.5989, + "step": 5190 + }, + { + "epoch": 0.22, + "learning_rate": 8.90979050876443e-05, + "loss": 0.5863, + "step": 5200 + }, + { + "epoch": 0.22, + "learning_rate": 8.90765284309534e-05, + "loss": 0.6467, + "step": 5210 + }, + { + "epoch": 0.22, + "learning_rate": 8.905515177426252e-05, + "loss": 0.5777, + "step": 5220 + }, + { + "epoch": 0.22, + "learning_rate": 8.903377511757162e-05, + "loss": 0.6356, + "step": 5230 + }, + { + "epoch": 0.22, + "learning_rate": 8.901239846088071e-05, + "loss": 0.5919, + "step": 5240 + }, + { + "epoch": 0.22, + "learning_rate": 8.899102180418983e-05, + "loss": 0.5756, + "step": 5250 + }, + { + "epoch": 0.22, + "learning_rate": 8.896964514749894e-05, + "loss": 0.6475, + "step": 5260 + }, + { + "epoch": 0.22, + "learning_rate": 8.894826849080804e-05, + "loss": 0.5882, + "step": 5270 + }, + { + "epoch": 0.23, + "learning_rate": 8.892689183411714e-05, + "loss": 0.6429, + "step": 5280 + }, + { + "epoch": 0.23, + "learning_rate": 8.890551517742626e-05, + "loss": 0.5866, + "step": 5290 + }, + { + "epoch": 0.23, + "learning_rate": 8.888413852073537e-05, + "loss": 0.5816, + "step": 5300 + }, + { + "epoch": 0.23, + "learning_rate": 8.886276186404446e-05, + "loss": 0.6504, + "step": 5310 + }, + { + "epoch": 0.23, + "learning_rate": 8.884138520735358e-05, + "loss": 0.5742, + "step": 5320 + }, + { + "epoch": 0.23, + "learning_rate": 8.882000855066268e-05, + "loss": 0.6353, + "step": 5330 + }, + { + "epoch": 0.23, + "learning_rate": 8.879863189397178e-05, + "loss": 0.585, + "step": 5340 + }, + { + "epoch": 0.23, + "learning_rate": 8.87772552372809e-05, + "loss": 0.5748, + "step": 5350 + }, + { + "epoch": 0.23, + "learning_rate": 8.875587858059001e-05, + "loss": 0.6476, + "step": 5360 + }, + { + "epoch": 0.23, + "learning_rate": 8.87345019238991e-05, + "loss": 0.579, + "step": 5370 + }, + { + "epoch": 0.23, + "learning_rate": 8.87131252672082e-05, + "loss": 0.6323, + "step": 5380 + }, + { + "epoch": 0.23, + "learning_rate": 8.869174861051732e-05, + "loss": 0.5885, + "step": 5390 + }, + { + "epoch": 0.23, + "learning_rate": 8.867037195382643e-05, + "loss": 0.563, + "step": 5400 + }, + { + "epoch": 0.23, + "learning_rate": 8.864899529713553e-05, + "loss": 0.6455, + "step": 5410 + }, + { + "epoch": 0.23, + "learning_rate": 8.862761864044465e-05, + "loss": 0.5843, + "step": 5420 + }, + { + "epoch": 0.23, + "learning_rate": 8.860624198375375e-05, + "loss": 0.6458, + "step": 5430 + }, + { + "epoch": 0.23, + "learning_rate": 8.858486532706284e-05, + "loss": 0.5807, + "step": 5440 + }, + { + "epoch": 0.23, + "learning_rate": 8.856348867037196e-05, + "loss": 0.5734, + "step": 5450 + }, + { + "epoch": 0.23, + "learning_rate": 8.854211201368107e-05, + "loss": 0.6472, + "step": 5460 + }, + { + "epoch": 0.23, + "learning_rate": 8.852073535699017e-05, + "loss": 0.5796, + "step": 5470 + }, + { + "epoch": 0.23, + "learning_rate": 8.849935870029928e-05, + "loss": 0.6374, + "step": 5480 + }, + { + "epoch": 0.23, + "learning_rate": 8.847798204360839e-05, + "loss": 0.5877, + "step": 5490 + }, + { + "epoch": 0.23, + "learning_rate": 8.845660538691748e-05, + "loss": 0.5767, + "step": 5500 + }, + { + "epoch": 0.24, + "learning_rate": 8.843522873022659e-05, + "loss": 0.6475, + "step": 5510 + }, + { + "epoch": 0.24, + "learning_rate": 8.84138520735357e-05, + "loss": 0.5748, + "step": 5520 + }, + { + "epoch": 0.24, + "learning_rate": 8.839247541684481e-05, + "loss": 0.628, + "step": 5530 + }, + { + "epoch": 0.24, + "learning_rate": 8.837109876015392e-05, + "loss": 0.5823, + "step": 5540 + }, + { + "epoch": 0.24, + "learning_rate": 8.834972210346302e-05, + "loss": 0.5658, + "step": 5550 + }, + { + "epoch": 0.24, + "learning_rate": 8.832834544677214e-05, + "loss": 0.6425, + "step": 5560 + }, + { + "epoch": 0.24, + "learning_rate": 8.830696879008123e-05, + "loss": 0.5654, + "step": 5570 + }, + { + "epoch": 0.24, + "learning_rate": 8.828559213339033e-05, + "loss": 0.6343, + "step": 5580 + }, + { + "epoch": 0.24, + "learning_rate": 8.826421547669945e-05, + "loss": 0.5908, + "step": 5590 + }, + { + "epoch": 0.24, + "learning_rate": 8.824283882000856e-05, + "loss": 0.5664, + "step": 5600 + }, + { + "epoch": 0.24, + "learning_rate": 8.822146216331766e-05, + "loss": 0.6408, + "step": 5610 + }, + { + "epoch": 0.24, + "learning_rate": 8.820008550662678e-05, + "loss": 0.5767, + "step": 5620 + }, + { + "epoch": 0.24, + "learning_rate": 8.817870884993587e-05, + "loss": 0.6289, + "step": 5630 + }, + { + "epoch": 0.24, + "learning_rate": 8.815733219324497e-05, + "loss": 0.5879, + "step": 5640 + }, + { + "epoch": 0.24, + "learning_rate": 8.813595553655408e-05, + "loss": 0.5727, + "step": 5650 + }, + { + "epoch": 0.24, + "learning_rate": 8.81145788798632e-05, + "loss": 0.6488, + "step": 5660 + }, + { + "epoch": 0.24, + "learning_rate": 8.80932022231723e-05, + "loss": 0.5742, + "step": 5670 + }, + { + "epoch": 0.24, + "learning_rate": 8.80718255664814e-05, + "loss": 0.6282, + "step": 5680 + }, + { + "epoch": 0.24, + "learning_rate": 8.805044890979051e-05, + "loss": 0.5916, + "step": 5690 + }, + { + "epoch": 0.24, + "learning_rate": 8.802907225309962e-05, + "loss": 0.5768, + "step": 5700 + }, + { + "epoch": 0.24, + "learning_rate": 8.800769559640872e-05, + "loss": 0.6477, + "step": 5710 + }, + { + "epoch": 0.24, + "learning_rate": 8.798631893971784e-05, + "loss": 0.5747, + "step": 5720 + }, + { + "epoch": 0.24, + "learning_rate": 8.796494228302694e-05, + "loss": 0.6366, + "step": 5730 + }, + { + "epoch": 0.24, + "learning_rate": 8.794356562633605e-05, + "loss": 0.5956, + "step": 5740 + }, + { + "epoch": 0.25, + "learning_rate": 8.792218896964515e-05, + "loss": 0.5823, + "step": 5750 + }, + { + "epoch": 0.25, + "learning_rate": 8.790081231295426e-05, + "loss": 0.6509, + "step": 5760 + }, + { + "epoch": 0.25, + "learning_rate": 8.787943565626336e-05, + "loss": 0.577, + "step": 5770 + }, + { + "epoch": 0.25, + "learning_rate": 8.785805899957246e-05, + "loss": 0.6375, + "step": 5780 + }, + { + "epoch": 0.25, + "learning_rate": 8.783668234288158e-05, + "loss": 0.5858, + "step": 5790 + }, + { + "epoch": 0.25, + "learning_rate": 8.781530568619069e-05, + "loss": 0.5803, + "step": 5800 + }, + { + "epoch": 0.25, + "learning_rate": 8.779392902949979e-05, + "loss": 0.642, + "step": 5810 + }, + { + "epoch": 0.25, + "learning_rate": 8.77725523728089e-05, + "loss": 0.5635, + "step": 5820 + }, + { + "epoch": 0.25, + "learning_rate": 8.7751175716118e-05, + "loss": 0.6303, + "step": 5830 + }, + { + "epoch": 0.25, + "learning_rate": 8.77297990594271e-05, + "loss": 0.5854, + "step": 5840 + }, + { + "epoch": 0.25, + "learning_rate": 8.770842240273621e-05, + "loss": 0.5705, + "step": 5850 + }, + { + "epoch": 0.25, + "learning_rate": 8.768704574604533e-05, + "loss": 0.6394, + "step": 5860 + }, + { + "epoch": 0.25, + "learning_rate": 8.766566908935443e-05, + "loss": 0.5694, + "step": 5870 + }, + { + "epoch": 0.25, + "learning_rate": 8.764429243266354e-05, + "loss": 0.6324, + "step": 5880 + }, + { + "epoch": 0.25, + "learning_rate": 8.762291577597264e-05, + "loss": 0.5907, + "step": 5890 + }, + { + "epoch": 0.25, + "learning_rate": 8.760153911928175e-05, + "loss": 0.582, + "step": 5900 + }, + { + "epoch": 0.25, + "learning_rate": 8.758016246259085e-05, + "loss": 0.6426, + "step": 5910 + }, + { + "epoch": 0.25, + "learning_rate": 8.755878580589995e-05, + "loss": 0.5812, + "step": 5920 + }, + { + "epoch": 0.25, + "learning_rate": 8.753740914920907e-05, + "loss": 0.6322, + "step": 5930 + }, + { + "epoch": 0.25, + "learning_rate": 8.751603249251818e-05, + "loss": 0.5888, + "step": 5940 + }, + { + "epoch": 0.25, + "learning_rate": 8.749465583582728e-05, + "loss": 0.5756, + "step": 5950 + }, + { + "epoch": 0.25, + "learning_rate": 8.747327917913639e-05, + "loss": 0.6421, + "step": 5960 + }, + { + "epoch": 0.25, + "learning_rate": 8.745190252244549e-05, + "loss": 0.5776, + "step": 5970 + }, + { + "epoch": 0.26, + "learning_rate": 8.74305258657546e-05, + "loss": 0.6281, + "step": 5980 + }, + { + "epoch": 0.26, + "learning_rate": 8.740914920906371e-05, + "loss": 0.5991, + "step": 5990 + }, + { + "epoch": 0.26, + "learning_rate": 8.738777255237282e-05, + "loss": 0.5764, + "step": 6000 + }, + { + "epoch": 0.26, + "learning_rate": 8.736639589568192e-05, + "loss": 0.6396, + "step": 6010 + }, + { + "epoch": 0.26, + "learning_rate": 8.734501923899103e-05, + "loss": 0.5759, + "step": 6020 + }, + { + "epoch": 0.26, + "learning_rate": 8.732364258230013e-05, + "loss": 0.6337, + "step": 6030 + }, + { + "epoch": 0.26, + "learning_rate": 8.730226592560924e-05, + "loss": 0.5965, + "step": 6040 + }, + { + "epoch": 0.26, + "learning_rate": 8.728088926891834e-05, + "loss": 0.5752, + "step": 6050 + }, + { + "epoch": 0.26, + "learning_rate": 8.725951261222746e-05, + "loss": 0.6431, + "step": 6060 + }, + { + "epoch": 0.26, + "learning_rate": 8.723813595553656e-05, + "loss": 0.564, + "step": 6070 + }, + { + "epoch": 0.26, + "learning_rate": 8.721675929884567e-05, + "loss": 0.6388, + "step": 6080 + }, + { + "epoch": 0.26, + "learning_rate": 8.719538264215477e-05, + "loss": 0.592, + "step": 6090 + }, + { + "epoch": 0.26, + "learning_rate": 8.717400598546388e-05, + "loss": 0.5683, + "step": 6100 + }, + { + "epoch": 0.26, + "learning_rate": 8.715262932877298e-05, + "loss": 0.6348, + "step": 6110 + }, + { + "epoch": 0.26, + "learning_rate": 8.713125267208209e-05, + "loss": 0.5744, + "step": 6120 + }, + { + "epoch": 0.26, + "learning_rate": 8.71098760153912e-05, + "loss": 0.6318, + "step": 6130 + }, + { + "epoch": 0.26, + "learning_rate": 8.708849935870031e-05, + "loss": 0.5927, + "step": 6140 + }, + { + "epoch": 0.26, + "learning_rate": 8.706712270200941e-05, + "loss": 0.5735, + "step": 6150 + }, + { + "epoch": 0.26, + "learning_rate": 8.704574604531852e-05, + "loss": 0.643, + "step": 6160 + }, + { + "epoch": 0.26, + "learning_rate": 8.702436938862762e-05, + "loss": 0.5788, + "step": 6170 + }, + { + "epoch": 0.26, + "learning_rate": 8.700299273193673e-05, + "loss": 0.6335, + "step": 6180 + }, + { + "epoch": 0.26, + "learning_rate": 8.698161607524583e-05, + "loss": 0.5813, + "step": 6190 + }, + { + "epoch": 0.26, + "learning_rate": 8.696023941855495e-05, + "loss": 0.5684, + "step": 6200 + }, + { + "epoch": 0.26, + "learning_rate": 8.693886276186405e-05, + "loss": 0.631, + "step": 6210 + }, + { + "epoch": 0.27, + "learning_rate": 8.691748610517316e-05, + "loss": 0.5674, + "step": 6220 + }, + { + "epoch": 0.27, + "learning_rate": 8.689610944848226e-05, + "loss": 0.6316, + "step": 6230 + }, + { + "epoch": 0.27, + "learning_rate": 8.687473279179137e-05, + "loss": 0.5891, + "step": 6240 + }, + { + "epoch": 0.27, + "learning_rate": 8.685335613510047e-05, + "loss": 0.5761, + "step": 6250 + }, + { + "epoch": 0.27, + "learning_rate": 8.683197947840959e-05, + "loss": 0.6419, + "step": 6260 + }, + { + "epoch": 0.27, + "learning_rate": 8.68106028217187e-05, + "loss": 0.5742, + "step": 6270 + }, + { + "epoch": 0.27, + "learning_rate": 8.67892261650278e-05, + "loss": 0.6342, + "step": 6280 + }, + { + "epoch": 0.27, + "learning_rate": 8.676784950833689e-05, + "loss": 0.5912, + "step": 6290 + }, + { + "epoch": 0.27, + "learning_rate": 8.674647285164601e-05, + "loss": 0.5759, + "step": 6300 + }, + { + "epoch": 0.27, + "learning_rate": 8.672509619495511e-05, + "loss": 0.6286, + "step": 6310 + }, + { + "epoch": 0.27, + "learning_rate": 8.670371953826422e-05, + "loss": 0.5775, + "step": 6320 + }, + { + "epoch": 0.27, + "learning_rate": 8.668234288157333e-05, + "loss": 0.6346, + "step": 6330 + }, + { + "epoch": 0.27, + "learning_rate": 8.666096622488244e-05, + "loss": 0.5801, + "step": 6340 + }, + { + "epoch": 0.27, + "learning_rate": 8.663958956819154e-05, + "loss": 0.5638, + "step": 6350 + }, + { + "epoch": 0.27, + "learning_rate": 8.661821291150063e-05, + "loss": 0.642, + "step": 6360 + }, + { + "epoch": 0.27, + "learning_rate": 8.659683625480975e-05, + "loss": 0.5698, + "step": 6370 + }, + { + "epoch": 0.27, + "learning_rate": 8.657545959811886e-05, + "loss": 0.6293, + "step": 6380 + }, + { + "epoch": 0.27, + "learning_rate": 8.655408294142796e-05, + "loss": 0.5844, + "step": 6390 + }, + { + "epoch": 0.27, + "learning_rate": 8.653270628473708e-05, + "loss": 0.5729, + "step": 6400 + }, + { + "epoch": 0.27, + "learning_rate": 8.651132962804618e-05, + "loss": 0.6443, + "step": 6410 + }, + { + "epoch": 0.27, + "learning_rate": 8.648995297135528e-05, + "loss": 0.5785, + "step": 6420 + }, + { + "epoch": 0.27, + "learning_rate": 8.64685763146644e-05, + "loss": 0.6354, + "step": 6430 + }, + { + "epoch": 0.27, + "learning_rate": 8.64471996579735e-05, + "loss": 0.5819, + "step": 6440 + }, + { + "epoch": 0.28, + "learning_rate": 8.64258230012826e-05, + "loss": 0.5606, + "step": 6450 + }, + { + "epoch": 0.28, + "learning_rate": 8.640444634459171e-05, + "loss": 0.6505, + "step": 6460 + }, + { + "epoch": 0.28, + "learning_rate": 8.638306968790082e-05, + "loss": 0.5764, + "step": 6470 + }, + { + "epoch": 0.28, + "learning_rate": 8.636169303120992e-05, + "loss": 0.6339, + "step": 6480 + }, + { + "epoch": 0.28, + "learning_rate": 8.634031637451902e-05, + "loss": 0.5911, + "step": 6490 + }, + { + "epoch": 0.28, + "learning_rate": 8.631893971782814e-05, + "loss": 0.5716, + "step": 6500 + }, + { + "epoch": 0.28, + "learning_rate": 8.629756306113724e-05, + "loss": 0.6489, + "step": 6510 + }, + { + "epoch": 0.28, + "learning_rate": 8.627618640444635e-05, + "loss": 0.5706, + "step": 6520 + }, + { + "epoch": 0.28, + "learning_rate": 8.625480974775547e-05, + "loss": 0.6311, + "step": 6530 + }, + { + "epoch": 0.28, + "learning_rate": 8.623343309106457e-05, + "loss": 0.59, + "step": 6540 + }, + { + "epoch": 0.28, + "learning_rate": 8.621205643437366e-05, + "loss": 0.5642, + "step": 6550 + }, + { + "epoch": 0.28, + "learning_rate": 8.619067977768277e-05, + "loss": 0.6452, + "step": 6560 + }, + { + "epoch": 0.28, + "learning_rate": 8.616930312099188e-05, + "loss": 0.5715, + "step": 6570 + }, + { + "epoch": 0.28, + "learning_rate": 8.614792646430099e-05, + "loss": 0.6174, + "step": 6580 + }, + { + "epoch": 0.28, + "learning_rate": 8.612654980761009e-05, + "loss": 0.5702, + "step": 6590 + }, + { + "epoch": 0.28, + "learning_rate": 8.610517315091921e-05, + "loss": 0.5677, + "step": 6600 + }, + { + "epoch": 0.28, + "learning_rate": 8.60837964942283e-05, + "loss": 0.6382, + "step": 6610 + }, + { + "epoch": 0.28, + "learning_rate": 8.60624198375374e-05, + "loss": 0.5763, + "step": 6620 + }, + { + "epoch": 0.28, + "learning_rate": 8.604104318084651e-05, + "loss": 0.6281, + "step": 6630 + }, + { + "epoch": 0.28, + "learning_rate": 8.601966652415563e-05, + "loss": 0.5864, + "step": 6640 + }, + { + "epoch": 0.28, + "learning_rate": 8.599828986746473e-05, + "loss": 0.5705, + "step": 6650 + }, + { + "epoch": 0.28, + "learning_rate": 8.597691321077384e-05, + "loss": 0.6454, + "step": 6660 + }, + { + "epoch": 0.28, + "learning_rate": 8.595553655408296e-05, + "loss": 0.5756, + "step": 6670 + }, + { + "epoch": 0.28, + "learning_rate": 8.593415989739205e-05, + "loss": 0.6367, + "step": 6680 + }, + { + "epoch": 0.29, + "learning_rate": 8.591278324070115e-05, + "loss": 0.5747, + "step": 6690 + }, + { + "epoch": 0.29, + "learning_rate": 8.589140658401027e-05, + "loss": 0.5756, + "step": 6700 + }, + { + "epoch": 0.29, + "learning_rate": 8.587002992731937e-05, + "loss": 0.6373, + "step": 6710 + }, + { + "epoch": 0.29, + "learning_rate": 8.584865327062848e-05, + "loss": 0.5703, + "step": 6720 + }, + { + "epoch": 0.29, + "learning_rate": 8.582727661393758e-05, + "loss": 0.6384, + "step": 6730 + }, + { + "epoch": 0.29, + "learning_rate": 8.580589995724669e-05, + "loss": 0.5776, + "step": 6740 + }, + { + "epoch": 0.29, + "learning_rate": 8.578452330055579e-05, + "loss": 0.5724, + "step": 6750 + }, + { + "epoch": 0.29, + "learning_rate": 8.57631466438649e-05, + "loss": 0.6356, + "step": 6760 + }, + { + "epoch": 0.29, + "learning_rate": 8.574176998717401e-05, + "loss": 0.5704, + "step": 6770 + }, + { + "epoch": 0.29, + "learning_rate": 8.572039333048312e-05, + "loss": 0.6317, + "step": 6780 + }, + { + "epoch": 0.29, + "learning_rate": 8.569901667379222e-05, + "loss": 0.5927, + "step": 6790 + }, + { + "epoch": 0.29, + "learning_rate": 8.567764001710134e-05, + "loss": 0.5644, + "step": 6800 + }, + { + "epoch": 0.29, + "learning_rate": 8.565626336041043e-05, + "loss": 0.647, + "step": 6810 + }, + { + "epoch": 0.29, + "learning_rate": 8.563488670371954e-05, + "loss": 0.5739, + "step": 6820 + }, + { + "epoch": 0.29, + "learning_rate": 8.561351004702864e-05, + "loss": 0.6314, + "step": 6830 + }, + { + "epoch": 0.29, + "learning_rate": 8.559213339033776e-05, + "loss": 0.5791, + "step": 6840 + }, + { + "epoch": 0.29, + "learning_rate": 8.557075673364686e-05, + "loss": 0.5664, + "step": 6850 + }, + { + "epoch": 0.29, + "learning_rate": 8.554938007695597e-05, + "loss": 0.6443, + "step": 6860 + }, + { + "epoch": 0.29, + "learning_rate": 8.552800342026507e-05, + "loss": 0.5794, + "step": 6870 + }, + { + "epoch": 0.29, + "learning_rate": 8.550662676357418e-05, + "loss": 0.6261, + "step": 6880 + }, + { + "epoch": 0.29, + "learning_rate": 8.548525010688328e-05, + "loss": 0.5711, + "step": 6890 + }, + { + "epoch": 0.29, + "learning_rate": 8.546387345019239e-05, + "loss": 0.5646, + "step": 6900 + }, + { + "epoch": 0.29, + "learning_rate": 8.54424967935015e-05, + "loss": 0.6257, + "step": 6910 + }, + { + "epoch": 0.3, + "learning_rate": 8.542112013681061e-05, + "loss": 0.5686, + "step": 6920 + }, + { + "epoch": 0.3, + "learning_rate": 8.539974348011971e-05, + "loss": 0.6298, + "step": 6930 + }, + { + "epoch": 0.3, + "learning_rate": 8.537836682342882e-05, + "loss": 0.5772, + "step": 6940 + }, + { + "epoch": 0.3, + "learning_rate": 8.535699016673792e-05, + "loss": 0.5551, + "step": 6950 + }, + { + "epoch": 0.3, + "learning_rate": 8.533561351004703e-05, + "loss": 0.6515, + "step": 6960 + }, + { + "epoch": 0.3, + "learning_rate": 8.531423685335615e-05, + "loss": 0.5604, + "step": 6970 + }, + { + "epoch": 0.3, + "learning_rate": 8.529286019666525e-05, + "loss": 0.6233, + "step": 6980 + }, + { + "epoch": 0.3, + "learning_rate": 8.527148353997435e-05, + "loss": 0.5859, + "step": 6990 + }, + { + "epoch": 0.3, + "learning_rate": 8.525010688328346e-05, + "loss": 0.5653, + "step": 7000 + }, + { + "epoch": 0.3, + "learning_rate": 8.522873022659256e-05, + "loss": 0.6506, + "step": 7010 + }, + { + "epoch": 0.3, + "learning_rate": 8.520735356990167e-05, + "loss": 0.5721, + "step": 7020 + }, + { + "epoch": 0.3, + "learning_rate": 8.518597691321077e-05, + "loss": 0.6293, + "step": 7030 + }, + { + "epoch": 0.3, + "learning_rate": 8.516460025651989e-05, + "loss": 0.5837, + "step": 7040 + }, + { + "epoch": 0.3, + "learning_rate": 8.5143223599829e-05, + "loss": 0.5629, + "step": 7050 + }, + { + "epoch": 0.3, + "learning_rate": 8.51218469431381e-05, + "loss": 0.6381, + "step": 7060 + }, + { + "epoch": 0.3, + "learning_rate": 8.51004702864472e-05, + "loss": 0.5668, + "step": 7070 + }, + { + "epoch": 0.3, + "learning_rate": 8.507909362975631e-05, + "loss": 0.6315, + "step": 7080 + }, + { + "epoch": 0.3, + "learning_rate": 8.505771697306541e-05, + "loss": 0.5883, + "step": 7090 + }, + { + "epoch": 0.3, + "learning_rate": 8.503634031637452e-05, + "loss": 0.5641, + "step": 7100 + }, + { + "epoch": 0.3, + "learning_rate": 8.501496365968364e-05, + "loss": 0.6337, + "step": 7110 + }, + { + "epoch": 0.3, + "learning_rate": 8.499358700299274e-05, + "loss": 0.5743, + "step": 7120 + }, + { + "epoch": 0.3, + "learning_rate": 8.497221034630184e-05, + "loss": 0.6268, + "step": 7130 + }, + { + "epoch": 0.3, + "learning_rate": 8.495083368961095e-05, + "loss": 0.5807, + "step": 7140 + }, + { + "epoch": 0.31, + "learning_rate": 8.492945703292005e-05, + "loss": 0.566, + "step": 7150 + }, + { + "epoch": 0.31, + "learning_rate": 8.490808037622916e-05, + "loss": 0.6325, + "step": 7160 + }, + { + "epoch": 0.31, + "learning_rate": 8.488670371953826e-05, + "loss": 0.5665, + "step": 7170 + }, + { + "epoch": 0.31, + "learning_rate": 8.486532706284738e-05, + "loss": 0.6362, + "step": 7180 + }, + { + "epoch": 0.31, + "learning_rate": 8.484395040615649e-05, + "loss": 0.5832, + "step": 7190 + }, + { + "epoch": 0.31, + "learning_rate": 8.482257374946559e-05, + "loss": 0.5689, + "step": 7200 + }, + { + "epoch": 0.31, + "learning_rate": 8.48011970927747e-05, + "loss": 0.6282, + "step": 7210 + }, + { + "epoch": 0.31, + "learning_rate": 8.47798204360838e-05, + "loss": 0.5712, + "step": 7220 + }, + { + "epoch": 0.31, + "learning_rate": 8.47584437793929e-05, + "loss": 0.626, + "step": 7230 + }, + { + "epoch": 0.31, + "learning_rate": 8.473706712270202e-05, + "loss": 0.5838, + "step": 7240 + }, + { + "epoch": 0.31, + "learning_rate": 8.471569046601113e-05, + "loss": 0.5713, + "step": 7250 + }, + { + "epoch": 0.31, + "learning_rate": 8.469431380932023e-05, + "loss": 0.6296, + "step": 7260 + }, + { + "epoch": 0.31, + "learning_rate": 8.467293715262932e-05, + "loss": 0.5758, + "step": 7270 + }, + { + "epoch": 0.31, + "learning_rate": 8.465156049593844e-05, + "loss": 0.6305, + "step": 7280 + }, + { + "epoch": 0.31, + "learning_rate": 8.463018383924754e-05, + "loss": 0.5794, + "step": 7290 + }, + { + "epoch": 0.31, + "learning_rate": 8.460880718255665e-05, + "loss": 0.5734, + "step": 7300 + }, + { + "epoch": 0.31, + "learning_rate": 8.458743052586577e-05, + "loss": 0.6443, + "step": 7310 + }, + { + "epoch": 0.31, + "learning_rate": 8.456605386917487e-05, + "loss": 0.5678, + "step": 7320 + }, + { + "epoch": 0.31, + "learning_rate": 8.454467721248398e-05, + "loss": 0.6281, + "step": 7330 + }, + { + "epoch": 0.31, + "learning_rate": 8.452330055579308e-05, + "loss": 0.5862, + "step": 7340 + }, + { + "epoch": 0.31, + "learning_rate": 8.450192389910218e-05, + "loss": 0.566, + "step": 7350 + }, + { + "epoch": 0.31, + "learning_rate": 8.448054724241129e-05, + "loss": 0.6405, + "step": 7360 + }, + { + "epoch": 0.31, + "learning_rate": 8.44591705857204e-05, + "loss": 0.5694, + "step": 7370 + }, + { + "epoch": 0.31, + "learning_rate": 8.443779392902951e-05, + "loss": 0.6264, + "step": 7380 + }, + { + "epoch": 0.32, + "learning_rate": 8.441641727233862e-05, + "loss": 0.5824, + "step": 7390 + }, + { + "epoch": 0.32, + "learning_rate": 8.439504061564771e-05, + "loss": 0.5669, + "step": 7400 + }, + { + "epoch": 0.32, + "learning_rate": 8.437366395895683e-05, + "loss": 0.6339, + "step": 7410 + }, + { + "epoch": 0.32, + "learning_rate": 8.435228730226593e-05, + "loss": 0.5642, + "step": 7420 + }, + { + "epoch": 0.32, + "learning_rate": 8.433091064557503e-05, + "loss": 0.6182, + "step": 7430 + }, + { + "epoch": 0.32, + "learning_rate": 8.430953398888414e-05, + "loss": 0.5776, + "step": 7440 + }, + { + "epoch": 0.32, + "learning_rate": 8.428815733219326e-05, + "loss": 0.5681, + "step": 7450 + }, + { + "epoch": 0.32, + "learning_rate": 8.426678067550236e-05, + "loss": 0.6213, + "step": 7460 + }, + { + "epoch": 0.32, + "learning_rate": 8.424540401881145e-05, + "loss": 0.5612, + "step": 7470 + }, + { + "epoch": 0.32, + "learning_rate": 8.422402736212057e-05, + "loss": 0.6248, + "step": 7480 + }, + { + "epoch": 0.32, + "learning_rate": 8.420265070542967e-05, + "loss": 0.5795, + "step": 7490 + }, + { + "epoch": 0.32, + "learning_rate": 8.418127404873878e-05, + "loss": 0.56, + "step": 7500 + }, + { + "epoch": 0.32, + "learning_rate": 8.41598973920479e-05, + "loss": 0.6377, + "step": 7510 + }, + { + "epoch": 0.32, + "learning_rate": 8.4138520735357e-05, + "loss": 0.5756, + "step": 7520 + }, + { + "epoch": 0.32, + "learning_rate": 8.411714407866609e-05, + "loss": 0.6235, + "step": 7530 + }, + { + "epoch": 0.32, + "learning_rate": 8.40957674219752e-05, + "loss": 0.5808, + "step": 7540 + }, + { + "epoch": 0.32, + "learning_rate": 8.407439076528432e-05, + "loss": 0.571, + "step": 7550 + }, + { + "epoch": 0.32, + "learning_rate": 8.405301410859342e-05, + "loss": 0.6342, + "step": 7560 + }, + { + "epoch": 0.32, + "learning_rate": 8.403163745190252e-05, + "loss": 0.5678, + "step": 7570 + }, + { + "epoch": 0.32, + "learning_rate": 8.401026079521164e-05, + "loss": 0.6195, + "step": 7580 + }, + { + "epoch": 0.32, + "learning_rate": 8.398888413852075e-05, + "loss": 0.5756, + "step": 7590 + }, + { + "epoch": 0.32, + "learning_rate": 8.396750748182984e-05, + "loss": 0.5772, + "step": 7600 + }, + { + "epoch": 0.32, + "learning_rate": 8.394613082513896e-05, + "loss": 0.6286, + "step": 7610 + }, + { + "epoch": 0.33, + "learning_rate": 8.392475416844806e-05, + "loss": 0.5622, + "step": 7620 + }, + { + "epoch": 0.33, + "learning_rate": 8.390337751175717e-05, + "loss": 0.627, + "step": 7630 + }, + { + "epoch": 0.33, + "learning_rate": 8.388200085506627e-05, + "loss": 0.5709, + "step": 7640 + }, + { + "epoch": 0.33, + "learning_rate": 8.386062419837539e-05, + "loss": 0.5667, + "step": 7650 + }, + { + "epoch": 0.33, + "learning_rate": 8.383924754168448e-05, + "loss": 0.6382, + "step": 7660 + }, + { + "epoch": 0.33, + "learning_rate": 8.381787088499358e-05, + "loss": 0.572, + "step": 7670 + }, + { + "epoch": 0.33, + "learning_rate": 8.37964942283027e-05, + "loss": 0.6292, + "step": 7680 + }, + { + "epoch": 0.33, + "learning_rate": 8.37751175716118e-05, + "loss": 0.5808, + "step": 7690 + }, + { + "epoch": 0.33, + "learning_rate": 8.375374091492091e-05, + "loss": 0.5674, + "step": 7700 + }, + { + "epoch": 0.33, + "learning_rate": 8.373236425823001e-05, + "loss": 0.6286, + "step": 7710 + }, + { + "epoch": 0.33, + "learning_rate": 8.371098760153912e-05, + "loss": 0.5692, + "step": 7720 + }, + { + "epoch": 0.33, + "learning_rate": 8.368961094484822e-05, + "loss": 0.6254, + "step": 7730 + }, + { + "epoch": 0.33, + "learning_rate": 8.366823428815733e-05, + "loss": 0.5832, + "step": 7740 + }, + { + "epoch": 0.33, + "learning_rate": 8.364685763146645e-05, + "loss": 0.565, + "step": 7750 + }, + { + "epoch": 0.33, + "learning_rate": 8.362548097477555e-05, + "loss": 0.6298, + "step": 7760 + }, + { + "epoch": 0.33, + "learning_rate": 8.360410431808466e-05, + "loss": 0.567, + "step": 7770 + }, + { + "epoch": 0.33, + "learning_rate": 8.358272766139377e-05, + "loss": 0.6213, + "step": 7780 + }, + { + "epoch": 0.33, + "learning_rate": 8.356135100470286e-05, + "loss": 0.574, + "step": 7790 + }, + { + "epoch": 0.33, + "learning_rate": 8.353997434801197e-05, + "loss": 0.5699, + "step": 7800 + }, + { + "epoch": 0.33, + "learning_rate": 8.351859769132107e-05, + "loss": 0.6268, + "step": 7810 + }, + { + "epoch": 0.33, + "learning_rate": 8.349722103463019e-05, + "loss": 0.5742, + "step": 7820 + }, + { + "epoch": 0.33, + "learning_rate": 8.34758443779393e-05, + "loss": 0.6264, + "step": 7830 + }, + { + "epoch": 0.33, + "learning_rate": 8.34544677212484e-05, + "loss": 0.5777, + "step": 7840 + }, + { + "epoch": 0.33, + "learning_rate": 8.34330910645575e-05, + "loss": 0.5623, + "step": 7850 + }, + { + "epoch": 0.34, + "learning_rate": 8.341171440786661e-05, + "loss": 0.6235, + "step": 7860 + }, + { + "epoch": 0.34, + "learning_rate": 8.339033775117571e-05, + "loss": 0.5674, + "step": 7870 + }, + { + "epoch": 0.34, + "learning_rate": 8.336896109448483e-05, + "loss": 0.6271, + "step": 7880 + }, + { + "epoch": 0.34, + "learning_rate": 8.334758443779394e-05, + "loss": 0.5822, + "step": 7890 + }, + { + "epoch": 0.34, + "learning_rate": 8.332620778110304e-05, + "loss": 0.5667, + "step": 7900 + }, + { + "epoch": 0.34, + "learning_rate": 8.330483112441215e-05, + "loss": 0.6348, + "step": 7910 + }, + { + "epoch": 0.34, + "learning_rate": 8.328345446772125e-05, + "loss": 0.5647, + "step": 7920 + }, + { + "epoch": 0.34, + "learning_rate": 8.326207781103035e-05, + "loss": 0.6239, + "step": 7930 + }, + { + "epoch": 0.34, + "learning_rate": 8.324070115433946e-05, + "loss": 0.5812, + "step": 7940 + }, + { + "epoch": 0.34, + "learning_rate": 8.321932449764858e-05, + "loss": 0.5693, + "step": 7950 + }, + { + "epoch": 0.34, + "learning_rate": 8.319794784095768e-05, + "loss": 0.6279, + "step": 7960 + }, + { + "epoch": 0.34, + "learning_rate": 8.317657118426679e-05, + "loss": 0.5656, + "step": 7970 + }, + { + "epoch": 0.34, + "learning_rate": 8.315519452757589e-05, + "loss": 0.6269, + "step": 7980 + }, + { + "epoch": 0.34, + "learning_rate": 8.3133817870885e-05, + "loss": 0.5905, + "step": 7990 + }, + { + "epoch": 0.34, + "learning_rate": 8.31124412141941e-05, + "loss": 0.5605, + "step": 8000 + }, + { + "epoch": 0.34, + "learning_rate": 8.30910645575032e-05, + "loss": 0.636, + "step": 8010 + }, + { + "epoch": 0.34, + "learning_rate": 8.306968790081232e-05, + "loss": 0.5722, + "step": 8020 + }, + { + "epoch": 0.34, + "learning_rate": 8.304831124412143e-05, + "loss": 0.6249, + "step": 8030 + }, + { + "epoch": 0.34, + "learning_rate": 8.302693458743053e-05, + "loss": 0.5768, + "step": 8040 + }, + { + "epoch": 0.34, + "learning_rate": 8.300555793073964e-05, + "loss": 0.5677, + "step": 8050 + }, + { + "epoch": 0.34, + "learning_rate": 8.298418127404874e-05, + "loss": 0.6426, + "step": 8060 + }, + { + "epoch": 0.34, + "learning_rate": 8.296280461735784e-05, + "loss": 0.5684, + "step": 8070 + }, + { + "epoch": 0.34, + "learning_rate": 8.294142796066695e-05, + "loss": 0.6204, + "step": 8080 + }, + { + "epoch": 0.35, + "learning_rate": 8.292005130397607e-05, + "loss": 0.5883, + "step": 8090 + }, + { + "epoch": 0.35, + "learning_rate": 8.289867464728517e-05, + "loss": 0.5655, + "step": 8100 + }, + { + "epoch": 0.35, + "learning_rate": 8.287729799059428e-05, + "loss": 0.6434, + "step": 8110 + }, + { + "epoch": 0.35, + "learning_rate": 8.285592133390338e-05, + "loss": 0.5635, + "step": 8120 + }, + { + "epoch": 0.35, + "learning_rate": 8.283454467721249e-05, + "loss": 0.6223, + "step": 8130 + }, + { + "epoch": 0.35, + "learning_rate": 8.281316802052159e-05, + "loss": 0.5878, + "step": 8140 + }, + { + "epoch": 0.35, + "learning_rate": 8.279179136383071e-05, + "loss": 0.5602, + "step": 8150 + }, + { + "epoch": 0.35, + "learning_rate": 8.277041470713981e-05, + "loss": 0.6272, + "step": 8160 + }, + { + "epoch": 0.35, + "learning_rate": 8.274903805044892e-05, + "loss": 0.5667, + "step": 8170 + }, + { + "epoch": 0.35, + "learning_rate": 8.272766139375802e-05, + "loss": 0.6361, + "step": 8180 + }, + { + "epoch": 0.35, + "learning_rate": 8.270628473706713e-05, + "loss": 0.571, + "step": 8190 + }, + { + "epoch": 0.35, + "learning_rate": 8.268490808037623e-05, + "loss": 0.5656, + "step": 8200 + }, + { + "epoch": 0.35, + "learning_rate": 8.266353142368534e-05, + "loss": 0.6388, + "step": 8210 + }, + { + "epoch": 0.35, + "learning_rate": 8.264215476699445e-05, + "loss": 0.5653, + "step": 8220 + }, + { + "epoch": 0.35, + "learning_rate": 8.262077811030356e-05, + "loss": 0.6226, + "step": 8230 + }, + { + "epoch": 0.35, + "learning_rate": 8.259940145361266e-05, + "loss": 0.5663, + "step": 8240 + }, + { + "epoch": 0.35, + "learning_rate": 8.257802479692177e-05, + "loss": 0.5657, + "step": 8250 + }, + { + "epoch": 0.35, + "learning_rate": 8.255664814023087e-05, + "loss": 0.6299, + "step": 8260 + }, + { + "epoch": 0.35, + "learning_rate": 8.253527148353998e-05, + "loss": 0.5626, + "step": 8270 + }, + { + "epoch": 0.35, + "learning_rate": 8.251389482684908e-05, + "loss": 0.628, + "step": 8280 + }, + { + "epoch": 0.35, + "learning_rate": 8.24925181701582e-05, + "loss": 0.5787, + "step": 8290 + }, + { + "epoch": 0.35, + "learning_rate": 8.24711415134673e-05, + "loss": 0.5607, + "step": 8300 + }, + { + "epoch": 0.35, + "learning_rate": 8.244976485677641e-05, + "loss": 0.6295, + "step": 8310 + }, + { + "epoch": 0.35, + "learning_rate": 8.242838820008551e-05, + "loss": 0.5609, + "step": 8320 + }, + { + "epoch": 0.36, + "learning_rate": 8.240701154339462e-05, + "loss": 0.6311, + "step": 8330 + }, + { + "epoch": 0.36, + "learning_rate": 8.238563488670372e-05, + "loss": 0.5823, + "step": 8340 + }, + { + "epoch": 0.36, + "learning_rate": 8.236425823001283e-05, + "loss": 0.5694, + "step": 8350 + }, + { + "epoch": 0.36, + "learning_rate": 8.234288157332194e-05, + "loss": 0.6357, + "step": 8360 + }, + { + "epoch": 0.36, + "learning_rate": 8.232150491663105e-05, + "loss": 0.5648, + "step": 8370 + }, + { + "epoch": 0.36, + "learning_rate": 8.230012825994015e-05, + "loss": 0.628, + "step": 8380 + }, + { + "epoch": 0.36, + "learning_rate": 8.227875160324926e-05, + "loss": 0.5765, + "step": 8390 + }, + { + "epoch": 0.36, + "learning_rate": 8.225737494655836e-05, + "loss": 0.5685, + "step": 8400 + }, + { + "epoch": 0.36, + "learning_rate": 8.223599828986747e-05, + "loss": 0.6356, + "step": 8410 + }, + { + "epoch": 0.36, + "learning_rate": 8.221462163317658e-05, + "loss": 0.5612, + "step": 8420 + }, + { + "epoch": 0.36, + "learning_rate": 8.219324497648569e-05, + "loss": 0.6249, + "step": 8430 + }, + { + "epoch": 0.36, + "learning_rate": 8.217186831979479e-05, + "loss": 0.5797, + "step": 8440 + }, + { + "epoch": 0.36, + "learning_rate": 8.215049166310388e-05, + "loss": 0.5656, + "step": 8450 + }, + { + "epoch": 0.36, + "learning_rate": 8.2129115006413e-05, + "loss": 0.6283, + "step": 8460 + }, + { + "epoch": 0.36, + "learning_rate": 8.21077383497221e-05, + "loss": 0.5533, + "step": 8470 + }, + { + "epoch": 0.36, + "learning_rate": 8.208636169303121e-05, + "loss": 0.632, + "step": 8480 + }, + { + "epoch": 0.36, + "learning_rate": 8.206498503634033e-05, + "loss": 0.5861, + "step": 8490 + }, + { + "epoch": 0.36, + "learning_rate": 8.204360837964943e-05, + "loss": 0.5651, + "step": 8500 + }, + { + "epoch": 0.36, + "learning_rate": 8.202223172295852e-05, + "loss": 0.639, + "step": 8510 + }, + { + "epoch": 0.36, + "learning_rate": 8.200085506626763e-05, + "loss": 0.5659, + "step": 8520 + }, + { + "epoch": 0.36, + "learning_rate": 8.197947840957675e-05, + "loss": 0.621, + "step": 8530 + }, + { + "epoch": 0.36, + "learning_rate": 8.195810175288585e-05, + "loss": 0.5816, + "step": 8540 + }, + { + "epoch": 0.36, + "learning_rate": 8.193672509619496e-05, + "loss": 0.5661, + "step": 8550 + }, + { + "epoch": 0.37, + "learning_rate": 8.191534843950407e-05, + "loss": 0.6287, + "step": 8560 + }, + { + "epoch": 0.37, + "learning_rate": 8.189397178281318e-05, + "loss": 0.5672, + "step": 8570 + }, + { + "epoch": 0.37, + "learning_rate": 8.187259512612227e-05, + "loss": 0.6281, + "step": 8580 + }, + { + "epoch": 0.37, + "learning_rate": 8.185121846943139e-05, + "loss": 0.588, + "step": 8590 + }, + { + "epoch": 0.37, + "learning_rate": 8.182984181274049e-05, + "loss": 0.5646, + "step": 8600 + }, + { + "epoch": 0.37, + "learning_rate": 8.18084651560496e-05, + "loss": 0.631, + "step": 8610 + }, + { + "epoch": 0.37, + "learning_rate": 8.17870884993587e-05, + "loss": 0.5629, + "step": 8620 + }, + { + "epoch": 0.37, + "learning_rate": 8.176571184266782e-05, + "loss": 0.6205, + "step": 8630 + }, + { + "epoch": 0.37, + "learning_rate": 8.174433518597691e-05, + "loss": 0.5734, + "step": 8640 + }, + { + "epoch": 0.37, + "learning_rate": 8.172295852928601e-05, + "loss": 0.5663, + "step": 8650 + }, + { + "epoch": 0.37, + "learning_rate": 8.170158187259513e-05, + "loss": 0.6379, + "step": 8660 + }, + { + "epoch": 0.37, + "learning_rate": 8.168020521590424e-05, + "loss": 0.5636, + "step": 8670 + }, + { + "epoch": 0.37, + "learning_rate": 8.165882855921334e-05, + "loss": 0.6246, + "step": 8680 + }, + { + "epoch": 0.37, + "learning_rate": 8.163745190252246e-05, + "loss": 0.5738, + "step": 8690 + }, + { + "epoch": 0.37, + "learning_rate": 8.161607524583156e-05, + "loss": 0.5652, + "step": 8700 + }, + { + "epoch": 0.37, + "learning_rate": 8.159469858914066e-05, + "loss": 0.6369, + "step": 8710 + }, + { + "epoch": 0.37, + "learning_rate": 8.157332193244976e-05, + "loss": 0.5641, + "step": 8720 + }, + { + "epoch": 0.37, + "learning_rate": 8.155194527575888e-05, + "loss": 0.6235, + "step": 8730 + }, + { + "epoch": 0.37, + "learning_rate": 8.153056861906798e-05, + "loss": 0.575, + "step": 8740 + }, + { + "epoch": 0.37, + "learning_rate": 8.150919196237709e-05, + "loss": 0.5654, + "step": 8750 + }, + { + "epoch": 0.37, + "learning_rate": 8.14878153056862e-05, + "loss": 0.6288, + "step": 8760 + }, + { + "epoch": 0.37, + "learning_rate": 8.14664386489953e-05, + "loss": 0.5654, + "step": 8770 + }, + { + "epoch": 0.37, + "learning_rate": 8.14450619923044e-05, + "loss": 0.6312, + "step": 8780 + }, + { + "epoch": 0.37, + "learning_rate": 8.14236853356135e-05, + "loss": 0.5766, + "step": 8790 + }, + { + "epoch": 0.38, + "learning_rate": 8.140230867892262e-05, + "loss": 0.5603, + "step": 8800 + }, + { + "epoch": 0.38, + "learning_rate": 8.138093202223173e-05, + "loss": 0.6398, + "step": 8810 + }, + { + "epoch": 0.38, + "learning_rate": 8.135955536554083e-05, + "loss": 0.5615, + "step": 8820 + }, + { + "epoch": 0.38, + "learning_rate": 8.133817870884995e-05, + "loss": 0.6243, + "step": 8830 + }, + { + "epoch": 0.38, + "learning_rate": 8.131680205215904e-05, + "loss": 0.5705, + "step": 8840 + }, + { + "epoch": 0.38, + "learning_rate": 8.129542539546815e-05, + "loss": 0.5594, + "step": 8850 + }, + { + "epoch": 0.38, + "learning_rate": 8.127404873877726e-05, + "loss": 0.6236, + "step": 8860 + }, + { + "epoch": 0.38, + "learning_rate": 8.125267208208637e-05, + "loss": 0.5609, + "step": 8870 + }, + { + "epoch": 0.38, + "learning_rate": 8.123129542539547e-05, + "loss": 0.6247, + "step": 8880 + }, + { + "epoch": 0.38, + "learning_rate": 8.120991876870458e-05, + "loss": 0.5712, + "step": 8890 + }, + { + "epoch": 0.38, + "learning_rate": 8.118854211201368e-05, + "loss": 0.563, + "step": 8900 + }, + { + "epoch": 0.38, + "learning_rate": 8.116716545532279e-05, + "loss": 0.632, + "step": 8910 + }, + { + "epoch": 0.38, + "learning_rate": 8.114578879863189e-05, + "loss": 0.566, + "step": 8920 + }, + { + "epoch": 0.38, + "learning_rate": 8.112441214194101e-05, + "loss": 0.6196, + "step": 8930 + }, + { + "epoch": 0.38, + "learning_rate": 8.110303548525011e-05, + "loss": 0.5776, + "step": 8940 + }, + { + "epoch": 0.38, + "learning_rate": 8.108165882855922e-05, + "loss": 0.5607, + "step": 8950 + }, + { + "epoch": 0.38, + "learning_rate": 8.106028217186832e-05, + "loss": 0.6289, + "step": 8960 + }, + { + "epoch": 0.38, + "learning_rate": 8.103890551517743e-05, + "loss": 0.5669, + "step": 8970 + }, + { + "epoch": 0.38, + "learning_rate": 8.101752885848653e-05, + "loss": 0.6181, + "step": 8980 + }, + { + "epoch": 0.38, + "learning_rate": 8.099615220179564e-05, + "loss": 0.5894, + "step": 8990 + }, + { + "epoch": 0.38, + "learning_rate": 8.097477554510475e-05, + "loss": 0.5576, + "step": 9000 + }, + { + "epoch": 0.38, + "learning_rate": 8.095339888841386e-05, + "loss": 0.6289, + "step": 9010 + }, + { + "epoch": 0.38, + "learning_rate": 8.093202223172296e-05, + "loss": 0.5601, + "step": 9020 + }, + { + "epoch": 0.39, + "learning_rate": 8.091064557503207e-05, + "loss": 0.6134, + "step": 9030 + }, + { + "epoch": 0.39, + "learning_rate": 8.088926891834117e-05, + "loss": 0.5778, + "step": 9040 + }, + { + "epoch": 0.39, + "learning_rate": 8.086789226165028e-05, + "loss": 0.5657, + "step": 9050 + }, + { + "epoch": 0.39, + "learning_rate": 8.084651560495938e-05, + "loss": 0.6255, + "step": 9060 + }, + { + "epoch": 0.39, + "learning_rate": 8.08251389482685e-05, + "loss": 0.5682, + "step": 9070 + }, + { + "epoch": 0.39, + "learning_rate": 8.08037622915776e-05, + "loss": 0.6174, + "step": 9080 + }, + { + "epoch": 0.39, + "learning_rate": 8.078238563488671e-05, + "loss": 0.5725, + "step": 9090 + }, + { + "epoch": 0.39, + "learning_rate": 8.076100897819581e-05, + "loss": 0.5647, + "step": 9100 + }, + { + "epoch": 0.39, + "learning_rate": 8.073963232150492e-05, + "loss": 0.6373, + "step": 9110 + }, + { + "epoch": 0.39, + "learning_rate": 8.071825566481402e-05, + "loss": 0.5684, + "step": 9120 + }, + { + "epoch": 0.39, + "learning_rate": 8.069687900812314e-05, + "loss": 0.624, + "step": 9130 + }, + { + "epoch": 0.39, + "learning_rate": 8.067550235143224e-05, + "loss": 0.5755, + "step": 9140 + }, + { + "epoch": 0.39, + "learning_rate": 8.065412569474135e-05, + "loss": 0.5609, + "step": 9150 + }, + { + "epoch": 0.39, + "learning_rate": 8.063274903805045e-05, + "loss": 0.6277, + "step": 9160 + }, + { + "epoch": 0.39, + "learning_rate": 8.061137238135956e-05, + "loss": 0.5597, + "step": 9170 + }, + { + "epoch": 0.39, + "learning_rate": 8.058999572466866e-05, + "loss": 0.6225, + "step": 9180 + }, + { + "epoch": 0.39, + "learning_rate": 8.056861906797777e-05, + "loss": 0.5845, + "step": 9190 + }, + { + "epoch": 0.39, + "learning_rate": 8.054724241128688e-05, + "loss": 0.5635, + "step": 9200 + }, + { + "epoch": 0.39, + "learning_rate": 8.052586575459599e-05, + "loss": 0.6314, + "step": 9210 + }, + { + "epoch": 0.39, + "learning_rate": 8.05044890979051e-05, + "loss": 0.5622, + "step": 9220 + }, + { + "epoch": 0.39, + "learning_rate": 8.04831124412142e-05, + "loss": 0.616, + "step": 9230 + }, + { + "epoch": 0.39, + "learning_rate": 8.04617357845233e-05, + "loss": 0.5695, + "step": 9240 + }, + { + "epoch": 0.39, + "learning_rate": 8.044035912783241e-05, + "loss": 0.5652, + "step": 9250 + }, + { + "epoch": 0.4, + "learning_rate": 8.041898247114151e-05, + "loss": 0.6347, + "step": 9260 + }, + { + "epoch": 0.4, + "learning_rate": 8.039760581445063e-05, + "loss": 0.5591, + "step": 9270 + }, + { + "epoch": 0.4, + "learning_rate": 8.037622915775973e-05, + "loss": 0.6265, + "step": 9280 + }, + { + "epoch": 0.4, + "learning_rate": 8.035485250106884e-05, + "loss": 0.5802, + "step": 9290 + }, + { + "epoch": 0.4, + "learning_rate": 8.033347584437794e-05, + "loss": 0.5697, + "step": 9300 + }, + { + "epoch": 0.4, + "learning_rate": 8.031209918768705e-05, + "loss": 0.6226, + "step": 9310 + }, + { + "epoch": 0.4, + "learning_rate": 8.029072253099615e-05, + "loss": 0.5622, + "step": 9320 + }, + { + "epoch": 0.4, + "learning_rate": 8.026934587430526e-05, + "loss": 0.6127, + "step": 9330 + }, + { + "epoch": 0.4, + "learning_rate": 8.024796921761438e-05, + "loss": 0.5773, + "step": 9340 + }, + { + "epoch": 0.4, + "learning_rate": 8.022659256092348e-05, + "loss": 0.5543, + "step": 9350 + }, + { + "epoch": 0.4, + "learning_rate": 8.020521590423258e-05, + "loss": 0.6252, + "step": 9360 + }, + { + "epoch": 0.4, + "learning_rate": 8.018383924754169e-05, + "loss": 0.5658, + "step": 9370 + }, + { + "epoch": 0.4, + "learning_rate": 8.01624625908508e-05, + "loss": 0.6248, + "step": 9380 + }, + { + "epoch": 0.4, + "learning_rate": 8.01410859341599e-05, + "loss": 0.5814, + "step": 9390 + }, + { + "epoch": 0.4, + "learning_rate": 8.011970927746902e-05, + "loss": 0.5603, + "step": 9400 + }, + { + "epoch": 0.4, + "learning_rate": 8.009833262077812e-05, + "loss": 0.6265, + "step": 9410 + }, + { + "epoch": 0.4, + "learning_rate": 8.007695596408722e-05, + "loss": 0.5623, + "step": 9420 + }, + { + "epoch": 0.4, + "learning_rate": 8.005557930739632e-05, + "loss": 0.6081, + "step": 9430 + }, + { + "epoch": 0.4, + "learning_rate": 8.003420265070543e-05, + "loss": 0.5702, + "step": 9440 + }, + { + "epoch": 0.4, + "learning_rate": 8.001282599401454e-05, + "loss": 0.5622, + "step": 9450 + }, + { + "epoch": 0.4, + "learning_rate": 7.999144933732364e-05, + "loss": 0.625, + "step": 9460 + }, + { + "epoch": 0.4, + "learning_rate": 7.997007268063276e-05, + "loss": 0.5675, + "step": 9470 + }, + { + "epoch": 0.4, + "learning_rate": 7.994869602394187e-05, + "loss": 0.6168, + "step": 9480 + }, + { + "epoch": 0.4, + "learning_rate": 7.992731936725097e-05, + "loss": 0.5726, + "step": 9490 + }, + { + "epoch": 0.41, + "learning_rate": 7.990594271056007e-05, + "loss": 0.5665, + "step": 9500 + }, + { + "epoch": 0.41, + "learning_rate": 7.988456605386918e-05, + "loss": 0.6256, + "step": 9510 + }, + { + "epoch": 0.41, + "learning_rate": 7.986318939717828e-05, + "loss": 0.567, + "step": 9520 + }, + { + "epoch": 0.41, + "learning_rate": 7.984181274048739e-05, + "loss": 0.6187, + "step": 9530 + }, + { + "epoch": 0.41, + "learning_rate": 7.98204360837965e-05, + "loss": 0.5773, + "step": 9540 + }, + { + "epoch": 0.41, + "learning_rate": 7.979905942710561e-05, + "loss": 0.5619, + "step": 9550 + }, + { + "epoch": 0.41, + "learning_rate": 7.97776827704147e-05, + "loss": 0.6334, + "step": 9560 + }, + { + "epoch": 0.41, + "learning_rate": 7.975630611372382e-05, + "loss": 0.564, + "step": 9570 + }, + { + "epoch": 0.41, + "learning_rate": 7.973492945703292e-05, + "loss": 0.6292, + "step": 9580 + }, + { + "epoch": 0.41, + "learning_rate": 7.971355280034203e-05, + "loss": 0.5727, + "step": 9590 + }, + { + "epoch": 0.41, + "learning_rate": 7.969217614365113e-05, + "loss": 0.5622, + "step": 9600 + }, + { + "epoch": 0.41, + "learning_rate": 7.967079948696025e-05, + "loss": 0.625, + "step": 9610 + }, + { + "epoch": 0.41, + "learning_rate": 7.964942283026936e-05, + "loss": 0.5764, + "step": 9620 + }, + { + "epoch": 0.41, + "learning_rate": 7.962804617357845e-05, + "loss": 0.6248, + "step": 9630 + }, + { + "epoch": 0.41, + "learning_rate": 7.960666951688756e-05, + "loss": 0.5801, + "step": 9640 + }, + { + "epoch": 0.41, + "learning_rate": 7.958529286019667e-05, + "loss": 0.5671, + "step": 9650 + }, + { + "epoch": 0.41, + "learning_rate": 7.956391620350577e-05, + "loss": 0.6266, + "step": 9660 + }, + { + "epoch": 0.41, + "learning_rate": 7.954253954681489e-05, + "loss": 0.5657, + "step": 9670 + }, + { + "epoch": 0.41, + "learning_rate": 7.9521162890124e-05, + "loss": 0.6161, + "step": 9680 + }, + { + "epoch": 0.41, + "learning_rate": 7.949978623343309e-05, + "loss": 0.5695, + "step": 9690 + }, + { + "epoch": 0.41, + "learning_rate": 7.947840957674219e-05, + "loss": 0.5676, + "step": 9700 + }, + { + "epoch": 0.41, + "learning_rate": 7.945703292005131e-05, + "loss": 0.6205, + "step": 9710 + }, + { + "epoch": 0.41, + "learning_rate": 7.943565626336041e-05, + "loss": 0.5673, + "step": 9720 + }, + { + "epoch": 0.42, + "learning_rate": 7.941427960666952e-05, + "loss": 0.6226, + "step": 9730 + }, + { + "epoch": 0.42, + "learning_rate": 7.939290294997864e-05, + "loss": 0.56, + "step": 9740 + }, + { + "epoch": 0.42, + "learning_rate": 7.937152629328773e-05, + "loss": 0.5605, + "step": 9750 + }, + { + "epoch": 0.42, + "learning_rate": 7.935014963659683e-05, + "loss": 0.6242, + "step": 9760 + }, + { + "epoch": 0.42, + "learning_rate": 7.932877297990595e-05, + "loss": 0.5564, + "step": 9770 + }, + { + "epoch": 0.42, + "learning_rate": 7.930739632321506e-05, + "loss": 0.6175, + "step": 9780 + }, + { + "epoch": 0.42, + "learning_rate": 7.928601966652416e-05, + "loss": 0.5786, + "step": 9790 + }, + { + "epoch": 0.42, + "learning_rate": 7.926464300983326e-05, + "loss": 0.5682, + "step": 9800 + }, + { + "epoch": 0.42, + "learning_rate": 7.924326635314238e-05, + "loss": 0.6296, + "step": 9810 + }, + { + "epoch": 0.42, + "learning_rate": 7.922188969645147e-05, + "loss": 0.5602, + "step": 9820 + }, + { + "epoch": 0.42, + "learning_rate": 7.920051303976058e-05, + "loss": 0.6192, + "step": 9830 + }, + { + "epoch": 0.42, + "learning_rate": 7.91791363830697e-05, + "loss": 0.5676, + "step": 9840 + }, + { + "epoch": 0.42, + "learning_rate": 7.91577597263788e-05, + "loss": 0.5669, + "step": 9850 + }, + { + "epoch": 0.42, + "learning_rate": 7.91363830696879e-05, + "loss": 0.619, + "step": 9860 + }, + { + "epoch": 0.42, + "learning_rate": 7.911500641299701e-05, + "loss": 0.5639, + "step": 9870 + }, + { + "epoch": 0.42, + "learning_rate": 7.909362975630611e-05, + "loss": 0.6206, + "step": 9880 + }, + { + "epoch": 0.42, + "learning_rate": 7.907225309961522e-05, + "loss": 0.5823, + "step": 9890 + }, + { + "epoch": 0.42, + "learning_rate": 7.905087644292432e-05, + "loss": 0.5588, + "step": 9900 + }, + { + "epoch": 0.42, + "learning_rate": 7.902949978623344e-05, + "loss": 0.6308, + "step": 9910 + }, + { + "epoch": 0.42, + "learning_rate": 7.900812312954255e-05, + "loss": 0.5541, + "step": 9920 + }, + { + "epoch": 0.42, + "learning_rate": 7.898674647285165e-05, + "loss": 0.6127, + "step": 9930 + }, + { + "epoch": 0.42, + "learning_rate": 7.896536981616077e-05, + "loss": 0.5731, + "step": 9940 + }, + { + "epoch": 0.42, + "learning_rate": 7.894399315946986e-05, + "loss": 0.5519, + "step": 9950 + }, + { + "epoch": 0.42, + "learning_rate": 7.892261650277896e-05, + "loss": 0.629, + "step": 9960 + }, + { + "epoch": 0.43, + "learning_rate": 7.890123984608807e-05, + "loss": 0.5542, + "step": 9970 + }, + { + "epoch": 0.43, + "learning_rate": 7.887986318939719e-05, + "loss": 0.6201, + "step": 9980 + }, + { + "epoch": 0.43, + "learning_rate": 7.885848653270629e-05, + "loss": 0.5656, + "step": 9990 + }, + { + "epoch": 0.43, + "learning_rate": 7.88371098760154e-05, + "loss": 0.556, + "step": 10000 + }, + { + "epoch": 0.43, + "learning_rate": 7.88157332193245e-05, + "loss": 0.6346, + "step": 10010 + }, + { + "epoch": 0.43, + "learning_rate": 7.87943565626336e-05, + "loss": 0.5688, + "step": 10020 + }, + { + "epoch": 0.43, + "learning_rate": 7.877297990594271e-05, + "loss": 0.6181, + "step": 10030 + }, + { + "epoch": 0.43, + "learning_rate": 7.875160324925183e-05, + "loss": 0.5622, + "step": 10040 + }, + { + "epoch": 0.43, + "learning_rate": 7.873022659256093e-05, + "loss": 0.5596, + "step": 10050 + }, + { + "epoch": 0.43, + "learning_rate": 7.870884993587004e-05, + "loss": 0.6293, + "step": 10060 + }, + { + "epoch": 0.43, + "learning_rate": 7.868747327917914e-05, + "loss": 0.5648, + "step": 10070 + }, + { + "epoch": 0.43, + "learning_rate": 7.866609662248824e-05, + "loss": 0.6267, + "step": 10080 + }, + { + "epoch": 0.43, + "learning_rate": 7.864471996579735e-05, + "loss": 0.5801, + "step": 10090 + }, + { + "epoch": 0.43, + "learning_rate": 7.862334330910645e-05, + "loss": 0.5693, + "step": 10100 + }, + { + "epoch": 0.43, + "learning_rate": 7.860196665241557e-05, + "loss": 0.6275, + "step": 10110 + }, + { + "epoch": 0.43, + "learning_rate": 7.858058999572468e-05, + "loss": 0.5735, + "step": 10120 + }, + { + "epoch": 0.43, + "learning_rate": 7.855921333903378e-05, + "loss": 0.6187, + "step": 10130 + }, + { + "epoch": 0.43, + "learning_rate": 7.853783668234289e-05, + "loss": 0.569, + "step": 10140 + }, + { + "epoch": 0.43, + "learning_rate": 7.851646002565199e-05, + "loss": 0.5442, + "step": 10150 + }, + { + "epoch": 0.43, + "learning_rate": 7.84950833689611e-05, + "loss": 0.6257, + "step": 10160 + }, + { + "epoch": 0.43, + "learning_rate": 7.84737067122702e-05, + "loss": 0.5603, + "step": 10170 + }, + { + "epoch": 0.43, + "learning_rate": 7.845233005557932e-05, + "loss": 0.6104, + "step": 10180 + }, + { + "epoch": 0.43, + "learning_rate": 7.843095339888842e-05, + "loss": 0.5717, + "step": 10190 + }, + { + "epoch": 0.44, + "learning_rate": 7.840957674219753e-05, + "loss": 0.5603, + "step": 10200 + }, + { + "epoch": 0.44, + "learning_rate": 7.838820008550663e-05, + "loss": 0.613, + "step": 10210 + }, + { + "epoch": 0.44, + "learning_rate": 7.836682342881573e-05, + "loss": 0.5574, + "step": 10220 + }, + { + "epoch": 0.44, + "learning_rate": 7.834544677212484e-05, + "loss": 0.6197, + "step": 10230 + }, + { + "epoch": 0.44, + "learning_rate": 7.832407011543394e-05, + "loss": 0.5706, + "step": 10240 + }, + { + "epoch": 0.44, + "learning_rate": 7.830269345874306e-05, + "loss": 0.5641, + "step": 10250 + }, + { + "epoch": 0.44, + "learning_rate": 7.828131680205217e-05, + "loss": 0.6277, + "step": 10260 + }, + { + "epoch": 0.44, + "learning_rate": 7.825994014536127e-05, + "loss": 0.5599, + "step": 10270 + }, + { + "epoch": 0.44, + "learning_rate": 7.823856348867038e-05, + "loss": 0.6138, + "step": 10280 + }, + { + "epoch": 0.44, + "learning_rate": 7.821718683197948e-05, + "loss": 0.5742, + "step": 10290 + }, + { + "epoch": 0.44, + "learning_rate": 7.819581017528858e-05, + "loss": 0.5614, + "step": 10300 + }, + { + "epoch": 0.44, + "learning_rate": 7.81744335185977e-05, + "loss": 0.6212, + "step": 10310 + }, + { + "epoch": 0.44, + "learning_rate": 7.815305686190681e-05, + "loss": 0.55, + "step": 10320 + }, + { + "epoch": 0.44, + "learning_rate": 7.813168020521591e-05, + "loss": 0.6049, + "step": 10330 + }, + { + "epoch": 0.44, + "learning_rate": 7.811030354852502e-05, + "loss": 0.569, + "step": 10340 + }, + { + "epoch": 0.44, + "learning_rate": 7.808892689183412e-05, + "loss": 0.5514, + "step": 10350 + }, + { + "epoch": 0.44, + "learning_rate": 7.806755023514323e-05, + "loss": 0.6248, + "step": 10360 + }, + { + "epoch": 0.44, + "learning_rate": 7.804617357845233e-05, + "loss": 0.563, + "step": 10370 + }, + { + "epoch": 0.44, + "learning_rate": 7.802479692176145e-05, + "loss": 0.6117, + "step": 10380 + }, + { + "epoch": 0.44, + "learning_rate": 7.800342026507055e-05, + "loss": 0.567, + "step": 10390 + }, + { + "epoch": 0.44, + "learning_rate": 7.798204360837966e-05, + "loss": 0.554, + "step": 10400 + }, + { + "epoch": 0.44, + "learning_rate": 7.796066695168876e-05, + "loss": 0.6228, + "step": 10410 + }, + { + "epoch": 0.44, + "learning_rate": 7.793929029499787e-05, + "loss": 0.5516, + "step": 10420 + }, + { + "epoch": 0.44, + "learning_rate": 7.791791363830697e-05, + "loss": 0.6168, + "step": 10430 + }, + { + "epoch": 0.45, + "learning_rate": 7.789653698161607e-05, + "loss": 0.5747, + "step": 10440 + }, + { + "epoch": 0.45, + "learning_rate": 7.787516032492519e-05, + "loss": 0.5578, + "step": 10450 + }, + { + "epoch": 0.45, + "learning_rate": 7.78537836682343e-05, + "loss": 0.6337, + "step": 10460 + }, + { + "epoch": 0.45, + "learning_rate": 7.78324070115434e-05, + "loss": 0.5529, + "step": 10470 + }, + { + "epoch": 0.45, + "learning_rate": 7.78110303548525e-05, + "loss": 0.6234, + "step": 10480 + }, + { + "epoch": 0.45, + "learning_rate": 7.778965369816161e-05, + "loss": 0.5685, + "step": 10490 + }, + { + "epoch": 0.45, + "learning_rate": 7.776827704147072e-05, + "loss": 0.5613, + "step": 10500 + }, + { + "epoch": 0.45, + "learning_rate": 7.774690038477982e-05, + "loss": 0.6168, + "step": 10510 + }, + { + "epoch": 0.45, + "learning_rate": 7.772552372808894e-05, + "loss": 0.5514, + "step": 10520 + }, + { + "epoch": 0.45, + "learning_rate": 7.770414707139804e-05, + "loss": 0.6233, + "step": 10530 + }, + { + "epoch": 0.45, + "learning_rate": 7.768277041470713e-05, + "loss": 0.5803, + "step": 10540 + }, + { + "epoch": 0.45, + "learning_rate": 7.766139375801625e-05, + "loss": 0.5612, + "step": 10550 + }, + { + "epoch": 0.45, + "learning_rate": 7.764001710132536e-05, + "loss": 0.6265, + "step": 10560 + }, + { + "epoch": 0.45, + "learning_rate": 7.761864044463446e-05, + "loss": 0.5668, + "step": 10570 + }, + { + "epoch": 0.45, + "learning_rate": 7.759726378794358e-05, + "loss": 0.6183, + "step": 10580 + }, + { + "epoch": 0.45, + "learning_rate": 7.757588713125268e-05, + "loss": 0.5676, + "step": 10590 + }, + { + "epoch": 0.45, + "learning_rate": 7.755451047456179e-05, + "loss": 0.5597, + "step": 10600 + }, + { + "epoch": 0.45, + "learning_rate": 7.753313381787088e-05, + "loss": 0.6171, + "step": 10610 + }, + { + "epoch": 0.45, + "learning_rate": 7.751175716118e-05, + "loss": 0.5523, + "step": 10620 + }, + { + "epoch": 0.45, + "learning_rate": 7.74903805044891e-05, + "loss": 0.6123, + "step": 10630 + }, + { + "epoch": 0.45, + "learning_rate": 7.74690038477982e-05, + "loss": 0.5646, + "step": 10640 + }, + { + "epoch": 0.45, + "learning_rate": 7.744762719110732e-05, + "loss": 0.5457, + "step": 10650 + }, + { + "epoch": 0.45, + "learning_rate": 7.742625053441643e-05, + "loss": 0.6312, + "step": 10660 + }, + { + "epoch": 0.46, + "learning_rate": 7.740487387772552e-05, + "loss": 0.5652, + "step": 10670 + }, + { + "epoch": 0.46, + "learning_rate": 7.738349722103462e-05, + "loss": 0.6169, + "step": 10680 + }, + { + "epoch": 0.46, + "learning_rate": 7.736212056434374e-05, + "loss": 0.5657, + "step": 10690 + }, + { + "epoch": 0.46, + "learning_rate": 7.734074390765285e-05, + "loss": 0.5537, + "step": 10700 + }, + { + "epoch": 0.46, + "learning_rate": 7.731936725096195e-05, + "loss": 0.6173, + "step": 10710 + }, + { + "epoch": 0.46, + "learning_rate": 7.729799059427107e-05, + "loss": 0.5588, + "step": 10720 + }, + { + "epoch": 0.46, + "learning_rate": 7.727661393758017e-05, + "loss": 0.6074, + "step": 10730 + }, + { + "epoch": 0.46, + "learning_rate": 7.725523728088926e-05, + "loss": 0.565, + "step": 10740 + }, + { + "epoch": 0.46, + "learning_rate": 7.723386062419838e-05, + "loss": 0.5519, + "step": 10750 + }, + { + "epoch": 0.46, + "learning_rate": 7.721248396750749e-05, + "loss": 0.6164, + "step": 10760 + }, + { + "epoch": 0.46, + "learning_rate": 7.719110731081659e-05, + "loss": 0.5565, + "step": 10770 + }, + { + "epoch": 0.46, + "learning_rate": 7.71697306541257e-05, + "loss": 0.6169, + "step": 10780 + }, + { + "epoch": 0.46, + "learning_rate": 7.714835399743481e-05, + "loss": 0.5726, + "step": 10790 + }, + { + "epoch": 0.46, + "learning_rate": 7.71269773407439e-05, + "loss": 0.5591, + "step": 10800 + }, + { + "epoch": 0.46, + "learning_rate": 7.710560068405301e-05, + "loss": 0.6299, + "step": 10810 + }, + { + "epoch": 0.46, + "learning_rate": 7.708422402736213e-05, + "loss": 0.5664, + "step": 10820 + }, + { + "epoch": 0.46, + "learning_rate": 7.706284737067123e-05, + "loss": 0.6203, + "step": 10830 + }, + { + "epoch": 0.46, + "learning_rate": 7.704147071398034e-05, + "loss": 0.5733, + "step": 10840 + }, + { + "epoch": 0.46, + "learning_rate": 7.702009405728945e-05, + "loss": 0.554, + "step": 10850 + }, + { + "epoch": 0.46, + "learning_rate": 7.699871740059856e-05, + "loss": 0.6272, + "step": 10860 + }, + { + "epoch": 0.46, + "learning_rate": 7.697734074390765e-05, + "loss": 0.5607, + "step": 10870 + }, + { + "epoch": 0.46, + "learning_rate": 7.695596408721675e-05, + "loss": 0.6187, + "step": 10880 + }, + { + "epoch": 0.46, + "learning_rate": 7.693458743052587e-05, + "loss": 0.5696, + "step": 10890 + }, + { + "epoch": 0.47, + "learning_rate": 7.691321077383498e-05, + "loss": 0.5585, + "step": 10900 + }, + { + "epoch": 0.47, + "learning_rate": 7.689183411714408e-05, + "loss": 0.6249, + "step": 10910 + }, + { + "epoch": 0.47, + "learning_rate": 7.68704574604532e-05, + "loss": 0.5538, + "step": 10920 + }, + { + "epoch": 0.47, + "learning_rate": 7.684908080376229e-05, + "loss": 0.6157, + "step": 10930 + }, + { + "epoch": 0.47, + "learning_rate": 7.68277041470714e-05, + "loss": 0.5661, + "step": 10940 + }, + { + "epoch": 0.47, + "learning_rate": 7.68063274903805e-05, + "loss": 0.5522, + "step": 10950 + }, + { + "epoch": 0.47, + "learning_rate": 7.678495083368962e-05, + "loss": 0.6285, + "step": 10960 + }, + { + "epoch": 0.47, + "learning_rate": 7.676357417699872e-05, + "loss": 0.5496, + "step": 10970 + }, + { + "epoch": 0.47, + "learning_rate": 7.674219752030783e-05, + "loss": 0.6155, + "step": 10980 + }, + { + "epoch": 0.47, + "learning_rate": 7.672082086361693e-05, + "loss": 0.5656, + "step": 10990 + }, + { + "epoch": 0.47, + "learning_rate": 7.669944420692604e-05, + "loss": 0.561, + "step": 11000 + }, + { + "epoch": 0.47, + "learning_rate": 7.667806755023514e-05, + "loss": 0.6313, + "step": 11010 + }, + { + "epoch": 0.47, + "learning_rate": 7.665669089354426e-05, + "loss": 0.5597, + "step": 11020 + }, + { + "epoch": 0.47, + "learning_rate": 7.663531423685336e-05, + "loss": 0.6134, + "step": 11030 + }, + { + "epoch": 0.47, + "learning_rate": 7.661393758016247e-05, + "loss": 0.5615, + "step": 11040 + }, + { + "epoch": 0.47, + "learning_rate": 7.659256092347157e-05, + "loss": 0.5518, + "step": 11050 + }, + { + "epoch": 0.47, + "learning_rate": 7.657118426678068e-05, + "loss": 0.6231, + "step": 11060 + }, + { + "epoch": 0.47, + "learning_rate": 7.654980761008978e-05, + "loss": 0.559, + "step": 11070 + }, + { + "epoch": 0.47, + "learning_rate": 7.652843095339889e-05, + "loss": 0.6163, + "step": 11080 + }, + { + "epoch": 0.47, + "learning_rate": 7.6507054296708e-05, + "loss": 0.5667, + "step": 11090 + }, + { + "epoch": 0.47, + "learning_rate": 7.648567764001711e-05, + "loss": 0.5631, + "step": 11100 + }, + { + "epoch": 0.47, + "learning_rate": 7.646430098332621e-05, + "loss": 0.6288, + "step": 11110 + }, + { + "epoch": 0.47, + "learning_rate": 7.644292432663532e-05, + "loss": 0.5643, + "step": 11120 + }, + { + "epoch": 0.47, + "learning_rate": 7.642154766994442e-05, + "loss": 0.6178, + "step": 11130 + }, + { + "epoch": 0.48, + "learning_rate": 7.640017101325353e-05, + "loss": 0.5725, + "step": 11140 + }, + { + "epoch": 0.48, + "learning_rate": 7.637879435656263e-05, + "loss": 0.5579, + "step": 11150 + }, + { + "epoch": 0.48, + "learning_rate": 7.635741769987175e-05, + "loss": 0.631, + "step": 11160 + }, + { + "epoch": 0.48, + "learning_rate": 7.633604104318085e-05, + "loss": 0.5479, + "step": 11170 + }, + { + "epoch": 0.48, + "learning_rate": 7.631466438648996e-05, + "loss": 0.6115, + "step": 11180 + }, + { + "epoch": 0.48, + "learning_rate": 7.629328772979906e-05, + "loss": 0.5702, + "step": 11190 + }, + { + "epoch": 0.48, + "learning_rate": 7.627191107310817e-05, + "loss": 0.5543, + "step": 11200 + }, + { + "epoch": 0.48, + "learning_rate": 7.625053441641727e-05, + "loss": 0.6261, + "step": 11210 + }, + { + "epoch": 0.48, + "learning_rate": 7.622915775972638e-05, + "loss": 0.5602, + "step": 11220 + }, + { + "epoch": 0.48, + "learning_rate": 7.62077811030355e-05, + "loss": 0.6155, + "step": 11230 + }, + { + "epoch": 0.48, + "learning_rate": 7.61864044463446e-05, + "loss": 0.5688, + "step": 11240 + }, + { + "epoch": 0.48, + "learning_rate": 7.61650277896537e-05, + "loss": 0.5544, + "step": 11250 + }, + { + "epoch": 0.48, + "learning_rate": 7.614365113296281e-05, + "loss": 0.6304, + "step": 11260 + }, + { + "epoch": 0.48, + "learning_rate": 7.612227447627191e-05, + "loss": 0.5553, + "step": 11270 + }, + { + "epoch": 0.48, + "learning_rate": 7.610089781958102e-05, + "loss": 0.6187, + "step": 11280 + }, + { + "epoch": 0.48, + "learning_rate": 7.607952116289013e-05, + "loss": 0.5706, + "step": 11290 + }, + { + "epoch": 0.48, + "learning_rate": 7.605814450619924e-05, + "loss": 0.5547, + "step": 11300 + }, + { + "epoch": 0.48, + "learning_rate": 7.603676784950834e-05, + "loss": 0.62, + "step": 11310 + }, + { + "epoch": 0.48, + "learning_rate": 7.601539119281745e-05, + "loss": 0.5592, + "step": 11320 + }, + { + "epoch": 0.48, + "learning_rate": 7.599401453612655e-05, + "loss": 0.6133, + "step": 11330 + }, + { + "epoch": 0.48, + "learning_rate": 7.597263787943566e-05, + "loss": 0.5815, + "step": 11340 + }, + { + "epoch": 0.48, + "learning_rate": 7.595126122274476e-05, + "loss": 0.5567, + "step": 11350 + }, + { + "epoch": 0.48, + "learning_rate": 7.592988456605388e-05, + "loss": 0.623, + "step": 11360 + }, + { + "epoch": 0.49, + "learning_rate": 7.590850790936298e-05, + "loss": 0.5623, + "step": 11370 + }, + { + "epoch": 0.49, + "learning_rate": 7.588713125267209e-05, + "loss": 0.6251, + "step": 11380 + }, + { + "epoch": 0.49, + "learning_rate": 7.586575459598119e-05, + "loss": 0.5729, + "step": 11390 + }, + { + "epoch": 0.49, + "learning_rate": 7.58443779392903e-05, + "loss": 0.5551, + "step": 11400 + }, + { + "epoch": 0.49, + "learning_rate": 7.58230012825994e-05, + "loss": 0.6208, + "step": 11410 + }, + { + "epoch": 0.49, + "learning_rate": 7.58016246259085e-05, + "loss": 0.5593, + "step": 11420 + }, + { + "epoch": 0.49, + "learning_rate": 7.578024796921762e-05, + "loss": 0.6249, + "step": 11430 + }, + { + "epoch": 0.49, + "learning_rate": 7.575887131252673e-05, + "loss": 0.5602, + "step": 11440 + }, + { + "epoch": 0.49, + "learning_rate": 7.573749465583583e-05, + "loss": 0.5558, + "step": 11450 + }, + { + "epoch": 0.49, + "learning_rate": 7.571611799914494e-05, + "loss": 0.6125, + "step": 11460 + }, + { + "epoch": 0.49, + "learning_rate": 7.569474134245404e-05, + "loss": 0.5575, + "step": 11470 + }, + { + "epoch": 0.49, + "learning_rate": 7.567336468576315e-05, + "loss": 0.6187, + "step": 11480 + }, + { + "epoch": 0.49, + "learning_rate": 7.565198802907225e-05, + "loss": 0.5689, + "step": 11490 + }, + { + "epoch": 0.49, + "learning_rate": 7.563061137238137e-05, + "loss": 0.553, + "step": 11500 + }, + { + "epoch": 0.49, + "learning_rate": 7.560923471569047e-05, + "loss": 0.622, + "step": 11510 + }, + { + "epoch": 0.49, + "learning_rate": 7.558785805899958e-05, + "loss": 0.5622, + "step": 11520 + }, + { + "epoch": 0.49, + "learning_rate": 7.556648140230868e-05, + "loss": 0.6229, + "step": 11530 + }, + { + "epoch": 0.49, + "learning_rate": 7.554510474561779e-05, + "loss": 0.5647, + "step": 11540 + }, + { + "epoch": 0.49, + "learning_rate": 7.552372808892689e-05, + "loss": 0.5506, + "step": 11550 + }, + { + "epoch": 0.49, + "learning_rate": 7.550235143223601e-05, + "loss": 0.6218, + "step": 11560 + }, + { + "epoch": 0.49, + "learning_rate": 7.548097477554511e-05, + "loss": 0.5543, + "step": 11570 + }, + { + "epoch": 0.49, + "learning_rate": 7.545959811885422e-05, + "loss": 0.6117, + "step": 11580 + }, + { + "epoch": 0.49, + "learning_rate": 7.543822146216331e-05, + "loss": 0.569, + "step": 11590 + }, + { + "epoch": 0.49, + "learning_rate": 7.541684480547243e-05, + "loss": 0.56, + "step": 11600 + }, + { + "epoch": 0.5, + "learning_rate": 7.539546814878153e-05, + "loss": 0.6231, + "step": 11610 + }, + { + "epoch": 0.5, + "learning_rate": 7.537409149209064e-05, + "loss": 0.565, + "step": 11620 + }, + { + "epoch": 0.5, + "learning_rate": 7.535271483539976e-05, + "loss": 0.6171, + "step": 11630 + }, + { + "epoch": 0.5, + "learning_rate": 7.533133817870886e-05, + "loss": 0.5682, + "step": 11640 + }, + { + "epoch": 0.5, + "learning_rate": 7.530996152201796e-05, + "loss": 0.5618, + "step": 11650 + }, + { + "epoch": 0.5, + "learning_rate": 7.528858486532707e-05, + "loss": 0.6262, + "step": 11660 + }, + { + "epoch": 0.5, + "learning_rate": 7.526720820863617e-05, + "loss": 0.5622, + "step": 11670 + }, + { + "epoch": 0.5, + "learning_rate": 7.524583155194528e-05, + "loss": 0.6232, + "step": 11680 + }, + { + "epoch": 0.5, + "learning_rate": 7.522445489525438e-05, + "loss": 0.56, + "step": 11690 + }, + { + "epoch": 0.5, + "learning_rate": 7.52030782385635e-05, + "loss": 0.5568, + "step": 11700 + }, + { + "epoch": 0.5, + "learning_rate": 7.51817015818726e-05, + "loss": 0.6131, + "step": 11710 + }, + { + "epoch": 0.5, + "learning_rate": 7.51603249251817e-05, + "loss": 0.557, + "step": 11720 + }, + { + "epoch": 0.5, + "learning_rate": 7.513894826849081e-05, + "loss": 0.6222, + "step": 11730 + }, + { + "epoch": 0.5, + "learning_rate": 7.511757161179992e-05, + "loss": 0.5621, + "step": 11740 + }, + { + "epoch": 0.5, + "learning_rate": 7.509619495510902e-05, + "loss": 0.5572, + "step": 11750 + }, + { + "epoch": 0.5, + "learning_rate": 7.507481829841813e-05, + "loss": 0.6251, + "step": 11760 + }, + { + "epoch": 0.5, + "learning_rate": 7.505344164172725e-05, + "loss": 0.5536, + "step": 11770 + }, + { + "epoch": 0.5, + "learning_rate": 7.503206498503634e-05, + "loss": 0.6123, + "step": 11780 + }, + { + "epoch": 0.5, + "learning_rate": 7.501068832834544e-05, + "loss": 0.5724, + "step": 11790 + }, + { + "epoch": 0.5, + "learning_rate": 7.498931167165456e-05, + "loss": 0.5607, + "step": 11800 + }, + { + "epoch": 0.5, + "learning_rate": 7.496793501496366e-05, + "loss": 0.6355, + "step": 11810 + }, + { + "epoch": 0.5, + "learning_rate": 7.494655835827277e-05, + "loss": 0.5486, + "step": 11820 + }, + { + "epoch": 0.5, + "learning_rate": 7.492518170158189e-05, + "loss": 0.6026, + "step": 11830 + }, + { + "epoch": 0.51, + "learning_rate": 7.490380504489099e-05, + "loss": 0.5734, + "step": 11840 + }, + { + "epoch": 0.51, + "learning_rate": 7.488242838820008e-05, + "loss": 0.5543, + "step": 11850 + }, + { + "epoch": 0.51, + "learning_rate": 7.486105173150919e-05, + "loss": 0.616, + "step": 11860 + }, + { + "epoch": 0.51, + "learning_rate": 7.48396750748183e-05, + "loss": 0.5626, + "step": 11870 + }, + { + "epoch": 0.51, + "learning_rate": 7.481829841812741e-05, + "loss": 0.614, + "step": 11880 + }, + { + "epoch": 0.51, + "learning_rate": 7.479692176143651e-05, + "loss": 0.5675, + "step": 11890 + }, + { + "epoch": 0.51, + "learning_rate": 7.477554510474563e-05, + "loss": 0.5544, + "step": 11900 + }, + { + "epoch": 0.51, + "learning_rate": 7.475416844805472e-05, + "loss": 0.625, + "step": 11910 + }, + { + "epoch": 0.51, + "learning_rate": 7.473279179136383e-05, + "loss": 0.5612, + "step": 11920 + }, + { + "epoch": 0.51, + "learning_rate": 7.471141513467293e-05, + "loss": 0.6091, + "step": 11930 + }, + { + "epoch": 0.51, + "learning_rate": 7.469003847798205e-05, + "loss": 0.5697, + "step": 11940 + }, + { + "epoch": 0.51, + "learning_rate": 7.466866182129115e-05, + "loss": 0.5563, + "step": 11950 + }, + { + "epoch": 0.51, + "learning_rate": 7.464728516460026e-05, + "loss": 0.6193, + "step": 11960 + }, + { + "epoch": 0.51, + "learning_rate": 7.462590850790938e-05, + "loss": 0.5459, + "step": 11970 + }, + { + "epoch": 0.51, + "learning_rate": 7.460453185121847e-05, + "loss": 0.6076, + "step": 11980 + }, + { + "epoch": 0.51, + "learning_rate": 7.458315519452757e-05, + "loss": 0.5711, + "step": 11990 + }, + { + "epoch": 0.51, + "learning_rate": 7.456177853783669e-05, + "loss": 0.5569, + "step": 12000 + }, + { + "epoch": 0.51, + "learning_rate": 7.45404018811458e-05, + "loss": 0.615, + "step": 12010 + }, + { + "epoch": 0.51, + "learning_rate": 7.45190252244549e-05, + "loss": 0.5621, + "step": 12020 + }, + { + "epoch": 0.51, + "learning_rate": 7.4497648567764e-05, + "loss": 0.6007, + "step": 12030 + }, + { + "epoch": 0.51, + "learning_rate": 7.447627191107311e-05, + "loss": 0.5629, + "step": 12040 + }, + { + "epoch": 0.51, + "learning_rate": 7.445489525438221e-05, + "loss": 0.559, + "step": 12050 + }, + { + "epoch": 0.51, + "learning_rate": 7.443351859769132e-05, + "loss": 0.6299, + "step": 12060 + }, + { + "epoch": 0.51, + "learning_rate": 7.441214194100044e-05, + "loss": 0.5519, + "step": 12070 + }, + { + "epoch": 0.52, + "learning_rate": 7.439076528430954e-05, + "loss": 0.6161, + "step": 12080 + }, + { + "epoch": 0.52, + "learning_rate": 7.436938862761864e-05, + "loss": 0.5714, + "step": 12090 + }, + { + "epoch": 0.52, + "learning_rate": 7.434801197092776e-05, + "loss": 0.5558, + "step": 12100 + }, + { + "epoch": 0.52, + "learning_rate": 7.432663531423685e-05, + "loss": 0.6178, + "step": 12110 + }, + { + "epoch": 0.52, + "learning_rate": 7.430525865754596e-05, + "loss": 0.5439, + "step": 12120 + }, + { + "epoch": 0.52, + "learning_rate": 7.428388200085506e-05, + "loss": 0.595, + "step": 12130 + }, + { + "epoch": 0.52, + "learning_rate": 7.426250534416418e-05, + "loss": 0.5671, + "step": 12140 + }, + { + "epoch": 0.52, + "learning_rate": 7.424112868747328e-05, + "loss": 0.5569, + "step": 12150 + }, + { + "epoch": 0.52, + "learning_rate": 7.421975203078239e-05, + "loss": 0.6265, + "step": 12160 + }, + { + "epoch": 0.52, + "learning_rate": 7.41983753740915e-05, + "loss": 0.5506, + "step": 12170 + }, + { + "epoch": 0.52, + "learning_rate": 7.41769987174006e-05, + "loss": 0.6197, + "step": 12180 + }, + { + "epoch": 0.52, + "learning_rate": 7.41556220607097e-05, + "loss": 0.5604, + "step": 12190 + }, + { + "epoch": 0.52, + "learning_rate": 7.413424540401881e-05, + "loss": 0.5427, + "step": 12200 + }, + { + "epoch": 0.52, + "learning_rate": 7.411286874732793e-05, + "loss": 0.6256, + "step": 12210 + }, + { + "epoch": 0.52, + "learning_rate": 7.409149209063703e-05, + "loss": 0.5521, + "step": 12220 + }, + { + "epoch": 0.52, + "learning_rate": 7.407011543394613e-05, + "loss": 0.6234, + "step": 12230 + }, + { + "epoch": 0.52, + "learning_rate": 7.404873877725524e-05, + "loss": 0.5811, + "step": 12240 + }, + { + "epoch": 0.52, + "learning_rate": 7.402736212056434e-05, + "loss": 0.5592, + "step": 12250 + }, + { + "epoch": 0.52, + "learning_rate": 7.400598546387345e-05, + "loss": 0.6214, + "step": 12260 + }, + { + "epoch": 0.52, + "learning_rate": 7.398460880718257e-05, + "loss": 0.5693, + "step": 12270 + }, + { + "epoch": 0.52, + "learning_rate": 7.396323215049167e-05, + "loss": 0.6104, + "step": 12280 + }, + { + "epoch": 0.52, + "learning_rate": 7.394185549380078e-05, + "loss": 0.5646, + "step": 12290 + }, + { + "epoch": 0.52, + "learning_rate": 7.392047883710988e-05, + "loss": 0.5454, + "step": 12300 + }, + { + "epoch": 0.53, + "learning_rate": 7.389910218041898e-05, + "loss": 0.6205, + "step": 12310 + }, + { + "epoch": 0.53, + "learning_rate": 7.387772552372809e-05, + "loss": 0.5636, + "step": 12320 + }, + { + "epoch": 0.53, + "learning_rate": 7.385634886703719e-05, + "loss": 0.6103, + "step": 12330 + }, + { + "epoch": 0.53, + "learning_rate": 7.383497221034631e-05, + "loss": 0.564, + "step": 12340 + }, + { + "epoch": 0.53, + "learning_rate": 7.381359555365542e-05, + "loss": 0.5465, + "step": 12350 + }, + { + "epoch": 0.53, + "learning_rate": 7.379221889696452e-05, + "loss": 0.6199, + "step": 12360 + }, + { + "epoch": 0.53, + "learning_rate": 7.377084224027362e-05, + "loss": 0.5572, + "step": 12370 + }, + { + "epoch": 0.53, + "learning_rate": 7.374946558358273e-05, + "loss": 0.6157, + "step": 12380 + }, + { + "epoch": 0.53, + "learning_rate": 7.372808892689183e-05, + "loss": 0.5651, + "step": 12390 + }, + { + "epoch": 0.53, + "learning_rate": 7.370671227020094e-05, + "loss": 0.5573, + "step": 12400 + }, + { + "epoch": 0.53, + "learning_rate": 7.368533561351006e-05, + "loss": 0.6123, + "step": 12410 + }, + { + "epoch": 0.53, + "learning_rate": 7.366395895681916e-05, + "loss": 0.5453, + "step": 12420 + }, + { + "epoch": 0.53, + "learning_rate": 7.364258230012827e-05, + "loss": 0.6098, + "step": 12430 + }, + { + "epoch": 0.53, + "learning_rate": 7.362120564343737e-05, + "loss": 0.5699, + "step": 12440 + }, + { + "epoch": 0.53, + "learning_rate": 7.359982898674647e-05, + "loss": 0.5535, + "step": 12450 + }, + { + "epoch": 0.53, + "learning_rate": 7.357845233005558e-05, + "loss": 0.6205, + "step": 12460 + }, + { + "epoch": 0.53, + "learning_rate": 7.355707567336468e-05, + "loss": 0.5481, + "step": 12470 + }, + { + "epoch": 0.53, + "learning_rate": 7.35356990166738e-05, + "loss": 0.6061, + "step": 12480 + }, + { + "epoch": 0.53, + "learning_rate": 7.35143223599829e-05, + "loss": 0.5694, + "step": 12490 + }, + { + "epoch": 0.53, + "learning_rate": 7.349294570329201e-05, + "loss": 0.5581, + "step": 12500 + }, + { + "epoch": 0.53, + "learning_rate": 7.347156904660112e-05, + "loss": 0.6265, + "step": 12510 + }, + { + "epoch": 0.53, + "learning_rate": 7.345019238991022e-05, + "loss": 0.5452, + "step": 12520 + }, + { + "epoch": 0.53, + "learning_rate": 7.342881573321932e-05, + "loss": 0.6147, + "step": 12530 + }, + { + "epoch": 0.53, + "learning_rate": 7.340743907652844e-05, + "loss": 0.5748, + "step": 12540 + }, + { + "epoch": 0.54, + "learning_rate": 7.338606241983755e-05, + "loss": 0.5463, + "step": 12550 + }, + { + "epoch": 0.54, + "learning_rate": 7.336468576314665e-05, + "loss": 0.6215, + "step": 12560 + }, + { + "epoch": 0.54, + "learning_rate": 7.334330910645574e-05, + "loss": 0.5459, + "step": 12570 + }, + { + "epoch": 0.54, + "learning_rate": 7.332193244976486e-05, + "loss": 0.6213, + "step": 12580 + }, + { + "epoch": 0.54, + "learning_rate": 7.330055579307396e-05, + "loss": 0.5602, + "step": 12590 + }, + { + "epoch": 0.54, + "learning_rate": 7.327917913638307e-05, + "loss": 0.5452, + "step": 12600 + }, + { + "epoch": 0.54, + "learning_rate": 7.325780247969219e-05, + "loss": 0.6142, + "step": 12610 + }, + { + "epoch": 0.54, + "learning_rate": 7.323642582300129e-05, + "loss": 0.5541, + "step": 12620 + }, + { + "epoch": 0.54, + "learning_rate": 7.32150491663104e-05, + "loss": 0.6155, + "step": 12630 + }, + { + "epoch": 0.54, + "learning_rate": 7.31936725096195e-05, + "loss": 0.5671, + "step": 12640 + }, + { + "epoch": 0.54, + "learning_rate": 7.31722958529286e-05, + "loss": 0.5519, + "step": 12650 + }, + { + "epoch": 0.54, + "learning_rate": 7.315091919623771e-05, + "loss": 0.6196, + "step": 12660 + }, + { + "epoch": 0.54, + "learning_rate": 7.312954253954681e-05, + "loss": 0.5538, + "step": 12670 + }, + { + "epoch": 0.54, + "learning_rate": 7.310816588285593e-05, + "loss": 0.6077, + "step": 12680 + }, + { + "epoch": 0.54, + "learning_rate": 7.308678922616504e-05, + "loss": 0.5656, + "step": 12690 + }, + { + "epoch": 0.54, + "learning_rate": 7.306541256947413e-05, + "loss": 0.5537, + "step": 12700 + }, + { + "epoch": 0.54, + "learning_rate": 7.304403591278325e-05, + "loss": 0.6226, + "step": 12710 + }, + { + "epoch": 0.54, + "learning_rate": 7.302265925609235e-05, + "loss": 0.5604, + "step": 12720 + }, + { + "epoch": 0.54, + "learning_rate": 7.300128259940145e-05, + "loss": 0.6109, + "step": 12730 + }, + { + "epoch": 0.54, + "learning_rate": 7.297990594271056e-05, + "loss": 0.5684, + "step": 12740 + }, + { + "epoch": 0.54, + "learning_rate": 7.295852928601968e-05, + "loss": 0.5563, + "step": 12750 + }, + { + "epoch": 0.54, + "learning_rate": 7.293715262932878e-05, + "loss": 0.6166, + "step": 12760 + }, + { + "epoch": 0.54, + "learning_rate": 7.291577597263787e-05, + "loss": 0.5489, + "step": 12770 + }, + { + "epoch": 0.55, + "learning_rate": 7.289439931594699e-05, + "loss": 0.6062, + "step": 12780 + }, + { + "epoch": 0.55, + "learning_rate": 7.28730226592561e-05, + "loss": 0.5662, + "step": 12790 + }, + { + "epoch": 0.55, + "learning_rate": 7.28516460025652e-05, + "loss": 0.5488, + "step": 12800 + }, + { + "epoch": 0.55, + "learning_rate": 7.283026934587432e-05, + "loss": 0.6219, + "step": 12810 + }, + { + "epoch": 0.55, + "learning_rate": 7.280889268918342e-05, + "loss": 0.5632, + "step": 12820 + }, + { + "epoch": 0.55, + "learning_rate": 7.278751603249251e-05, + "loss": 0.6184, + "step": 12830 + }, + { + "epoch": 0.55, + "learning_rate": 7.276613937580162e-05, + "loss": 0.5746, + "step": 12840 + }, + { + "epoch": 0.55, + "learning_rate": 7.274476271911074e-05, + "loss": 0.5566, + "step": 12850 + }, + { + "epoch": 0.55, + "learning_rate": 7.272338606241984e-05, + "loss": 0.6176, + "step": 12860 + }, + { + "epoch": 0.55, + "learning_rate": 7.270200940572895e-05, + "loss": 0.5477, + "step": 12870 + }, + { + "epoch": 0.55, + "learning_rate": 7.268063274903806e-05, + "loss": 0.6051, + "step": 12880 + }, + { + "epoch": 0.55, + "learning_rate": 7.265925609234717e-05, + "loss": 0.5711, + "step": 12890 + }, + { + "epoch": 0.55, + "learning_rate": 7.263787943565626e-05, + "loss": 0.5521, + "step": 12900 + }, + { + "epoch": 0.55, + "learning_rate": 7.261650277896538e-05, + "loss": 0.6286, + "step": 12910 + }, + { + "epoch": 0.55, + "learning_rate": 7.259512612227448e-05, + "loss": 0.5531, + "step": 12920 + }, + { + "epoch": 0.55, + "learning_rate": 7.257374946558359e-05, + "loss": 0.6108, + "step": 12930 + }, + { + "epoch": 0.55, + "learning_rate": 7.255237280889269e-05, + "loss": 0.5689, + "step": 12940 + }, + { + "epoch": 0.55, + "learning_rate": 7.253099615220181e-05, + "loss": 0.5504, + "step": 12950 + }, + { + "epoch": 0.55, + "learning_rate": 7.25096194955109e-05, + "loss": 0.6156, + "step": 12960 + }, + { + "epoch": 0.55, + "learning_rate": 7.248824283882e-05, + "loss": 0.5504, + "step": 12970 + }, + { + "epoch": 0.55, + "learning_rate": 7.246686618212912e-05, + "loss": 0.6042, + "step": 12980 + }, + { + "epoch": 0.55, + "learning_rate": 7.244548952543823e-05, + "loss": 0.5681, + "step": 12990 + }, + { + "epoch": 0.55, + "learning_rate": 7.242411286874733e-05, + "loss": 0.5633, + "step": 13000 + }, + { + "epoch": 0.56, + "learning_rate": 7.240273621205644e-05, + "loss": 0.6267, + "step": 13010 + }, + { + "epoch": 0.56, + "learning_rate": 7.238135955536554e-05, + "loss": 0.5553, + "step": 13020 + }, + { + "epoch": 0.56, + "learning_rate": 7.235998289867464e-05, + "loss": 0.6106, + "step": 13030 + }, + { + "epoch": 0.56, + "learning_rate": 7.233860624198375e-05, + "loss": 0.5787, + "step": 13040 + }, + { + "epoch": 0.56, + "learning_rate": 7.231722958529287e-05, + "loss": 0.5542, + "step": 13050 + }, + { + "epoch": 0.56, + "learning_rate": 7.229585292860197e-05, + "loss": 0.6155, + "step": 13060 + }, + { + "epoch": 0.56, + "learning_rate": 7.227447627191108e-05, + "loss": 0.5513, + "step": 13070 + }, + { + "epoch": 0.56, + "learning_rate": 7.22530996152202e-05, + "loss": 0.6095, + "step": 13080 + }, + { + "epoch": 0.56, + "learning_rate": 7.223172295852929e-05, + "loss": 0.5629, + "step": 13090 + }, + { + "epoch": 0.56, + "learning_rate": 7.221034630183839e-05, + "loss": 0.5403, + "step": 13100 + }, + { + "epoch": 0.56, + "learning_rate": 7.21889696451475e-05, + "loss": 0.6222, + "step": 13110 + }, + { + "epoch": 0.56, + "learning_rate": 7.216759298845661e-05, + "loss": 0.5589, + "step": 13120 + }, + { + "epoch": 0.56, + "learning_rate": 7.214621633176572e-05, + "loss": 0.6008, + "step": 13130 + }, + { + "epoch": 0.56, + "learning_rate": 7.212483967507482e-05, + "loss": 0.5636, + "step": 13140 + }, + { + "epoch": 0.56, + "learning_rate": 7.210346301838393e-05, + "loss": 0.5466, + "step": 13150 + }, + { + "epoch": 0.56, + "learning_rate": 7.208208636169303e-05, + "loss": 0.6106, + "step": 13160 + }, + { + "epoch": 0.56, + "learning_rate": 7.206070970500213e-05, + "loss": 0.5593, + "step": 13170 + }, + { + "epoch": 0.56, + "learning_rate": 7.203933304831125e-05, + "loss": 0.6045, + "step": 13180 + }, + { + "epoch": 0.56, + "learning_rate": 7.201795639162036e-05, + "loss": 0.5636, + "step": 13190 + }, + { + "epoch": 0.56, + "learning_rate": 7.199657973492946e-05, + "loss": 0.5514, + "step": 13200 + }, + { + "epoch": 0.56, + "learning_rate": 7.197520307823857e-05, + "loss": 0.6155, + "step": 13210 + }, + { + "epoch": 0.56, + "learning_rate": 7.195382642154767e-05, + "loss": 0.5519, + "step": 13220 + }, + { + "epoch": 0.56, + "learning_rate": 7.193244976485678e-05, + "loss": 0.615, + "step": 13230 + }, + { + "epoch": 0.56, + "learning_rate": 7.191107310816588e-05, + "loss": 0.562, + "step": 13240 + }, + { + "epoch": 0.57, + "learning_rate": 7.1889696451475e-05, + "loss": 0.5528, + "step": 13250 + }, + { + "epoch": 0.57, + "learning_rate": 7.18683197947841e-05, + "loss": 0.6204, + "step": 13260 + }, + { + "epoch": 0.57, + "learning_rate": 7.184694313809321e-05, + "loss": 0.546, + "step": 13270 + }, + { + "epoch": 0.57, + "learning_rate": 7.182556648140231e-05, + "loss": 0.6118, + "step": 13280 + }, + { + "epoch": 0.57, + "learning_rate": 7.180418982471142e-05, + "loss": 0.5642, + "step": 13290 + }, + { + "epoch": 0.57, + "learning_rate": 7.178281316802052e-05, + "loss": 0.5498, + "step": 13300 + }, + { + "epoch": 0.57, + "learning_rate": 7.176143651132962e-05, + "loss": 0.6148, + "step": 13310 + }, + { + "epoch": 0.57, + "learning_rate": 7.174005985463874e-05, + "loss": 0.5511, + "step": 13320 + }, + { + "epoch": 0.57, + "learning_rate": 7.171868319794785e-05, + "loss": 0.6013, + "step": 13330 + }, + { + "epoch": 0.57, + "learning_rate": 7.169730654125695e-05, + "loss": 0.5647, + "step": 13340 + }, + { + "epoch": 0.57, + "learning_rate": 7.167592988456606e-05, + "loss": 0.5514, + "step": 13350 + }, + { + "epoch": 0.57, + "learning_rate": 7.165455322787516e-05, + "loss": 0.6165, + "step": 13360 + }, + { + "epoch": 0.57, + "learning_rate": 7.163317657118427e-05, + "loss": 0.5494, + "step": 13370 + }, + { + "epoch": 0.57, + "learning_rate": 7.161179991449337e-05, + "loss": 0.6209, + "step": 13380 + }, + { + "epoch": 0.57, + "learning_rate": 7.159042325780249e-05, + "loss": 0.5546, + "step": 13390 + }, + { + "epoch": 0.57, + "learning_rate": 7.156904660111159e-05, + "loss": 0.553, + "step": 13400 + }, + { + "epoch": 0.57, + "learning_rate": 7.15476699444207e-05, + "loss": 0.6189, + "step": 13410 + }, + { + "epoch": 0.57, + "learning_rate": 7.15262932877298e-05, + "loss": 0.5616, + "step": 13420 + }, + { + "epoch": 0.57, + "learning_rate": 7.15049166310389e-05, + "loss": 0.6123, + "step": 13430 + }, + { + "epoch": 0.57, + "learning_rate": 7.148353997434801e-05, + "loss": 0.5707, + "step": 13440 + }, + { + "epoch": 0.57, + "learning_rate": 7.146216331765713e-05, + "loss": 0.543, + "step": 13450 + }, + { + "epoch": 0.57, + "learning_rate": 7.144078666096623e-05, + "loss": 0.616, + "step": 13460 + }, + { + "epoch": 0.57, + "learning_rate": 7.141941000427534e-05, + "loss": 0.5563, + "step": 13470 + }, + { + "epoch": 0.58, + "learning_rate": 7.139803334758444e-05, + "loss": 0.6117, + "step": 13480 + }, + { + "epoch": 0.58, + "learning_rate": 7.137665669089355e-05, + "loss": 0.5642, + "step": 13490 + }, + { + "epoch": 0.58, + "learning_rate": 7.135528003420265e-05, + "loss": 0.5511, + "step": 13500 + }, + { + "epoch": 0.58, + "learning_rate": 7.133390337751176e-05, + "loss": 0.6144, + "step": 13510 + }, + { + "epoch": 0.58, + "learning_rate": 7.131252672082087e-05, + "loss": 0.5564, + "step": 13520 + }, + { + "epoch": 0.58, + "learning_rate": 7.129115006412998e-05, + "loss": 0.6064, + "step": 13530 + }, + { + "epoch": 0.58, + "learning_rate": 7.126977340743908e-05, + "loss": 0.5533, + "step": 13540 + }, + { + "epoch": 0.58, + "learning_rate": 7.124839675074819e-05, + "loss": 0.5549, + "step": 13550 + }, + { + "epoch": 0.58, + "learning_rate": 7.122702009405729e-05, + "loss": 0.6233, + "step": 13560 + }, + { + "epoch": 0.58, + "learning_rate": 7.12056434373664e-05, + "loss": 0.5525, + "step": 13570 + }, + { + "epoch": 0.58, + "learning_rate": 7.11842667806755e-05, + "loss": 0.6054, + "step": 13580 + }, + { + "epoch": 0.58, + "learning_rate": 7.116289012398462e-05, + "loss": 0.5663, + "step": 13590 + }, + { + "epoch": 0.58, + "learning_rate": 7.114151346729372e-05, + "loss": 0.5531, + "step": 13600 + }, + { + "epoch": 0.58, + "learning_rate": 7.112013681060283e-05, + "loss": 0.6149, + "step": 13610 + }, + { + "epoch": 0.58, + "learning_rate": 7.109876015391193e-05, + "loss": 0.5489, + "step": 13620 + }, + { + "epoch": 0.58, + "learning_rate": 7.107738349722104e-05, + "loss": 0.6104, + "step": 13630 + }, + { + "epoch": 0.58, + "learning_rate": 7.105600684053014e-05, + "loss": 0.5654, + "step": 13640 + }, + { + "epoch": 0.58, + "learning_rate": 7.103463018383925e-05, + "loss": 0.5569, + "step": 13650 + }, + { + "epoch": 0.58, + "learning_rate": 7.101325352714836e-05, + "loss": 0.6236, + "step": 13660 + }, + { + "epoch": 0.58, + "learning_rate": 7.099187687045747e-05, + "loss": 0.5589, + "step": 13670 + }, + { + "epoch": 0.58, + "learning_rate": 7.097050021376657e-05, + "loss": 0.6043, + "step": 13680 + }, + { + "epoch": 0.58, + "learning_rate": 7.094912355707568e-05, + "loss": 0.5591, + "step": 13690 + }, + { + "epoch": 0.58, + "learning_rate": 7.092774690038478e-05, + "loss": 0.5528, + "step": 13700 + }, + { + "epoch": 0.58, + "learning_rate": 7.090637024369389e-05, + "loss": 0.6183, + "step": 13710 + }, + { + "epoch": 0.59, + "learning_rate": 7.0884993587003e-05, + "loss": 0.5464, + "step": 13720 + }, + { + "epoch": 0.59, + "learning_rate": 7.086361693031211e-05, + "loss": 0.6075, + "step": 13730 + }, + { + "epoch": 0.59, + "learning_rate": 7.084224027362121e-05, + "loss": 0.5572, + "step": 13740 + }, + { + "epoch": 0.59, + "learning_rate": 7.08208636169303e-05, + "loss": 0.5489, + "step": 13750 + }, + { + "epoch": 0.59, + "learning_rate": 7.079948696023942e-05, + "loss": 0.6223, + "step": 13760 + }, + { + "epoch": 0.59, + "learning_rate": 7.077811030354853e-05, + "loss": 0.548, + "step": 13770 + }, + { + "epoch": 0.59, + "learning_rate": 7.075673364685763e-05, + "loss": 0.6048, + "step": 13780 + }, + { + "epoch": 0.59, + "learning_rate": 7.073535699016675e-05, + "loss": 0.5744, + "step": 13790 + }, + { + "epoch": 0.59, + "learning_rate": 7.071398033347585e-05, + "loss": 0.543, + "step": 13800 + }, + { + "epoch": 0.59, + "learning_rate": 7.069260367678495e-05, + "loss": 0.6093, + "step": 13810 + }, + { + "epoch": 0.59, + "learning_rate": 7.067122702009405e-05, + "loss": 0.5512, + "step": 13820 + }, + { + "epoch": 0.59, + "learning_rate": 7.064985036340317e-05, + "loss": 0.6117, + "step": 13830 + }, + { + "epoch": 0.59, + "learning_rate": 7.062847370671227e-05, + "loss": 0.5643, + "step": 13840 + }, + { + "epoch": 0.59, + "learning_rate": 7.060709705002138e-05, + "loss": 0.5591, + "step": 13850 + }, + { + "epoch": 0.59, + "learning_rate": 7.05857203933305e-05, + "loss": 0.6178, + "step": 13860 + }, + { + "epoch": 0.59, + "learning_rate": 7.05643437366396e-05, + "loss": 0.5458, + "step": 13870 + }, + { + "epoch": 0.59, + "learning_rate": 7.054296707994869e-05, + "loss": 0.6113, + "step": 13880 + }, + { + "epoch": 0.59, + "learning_rate": 7.052159042325781e-05, + "loss": 0.5706, + "step": 13890 + }, + { + "epoch": 0.59, + "learning_rate": 7.050021376656691e-05, + "loss": 0.5493, + "step": 13900 + }, + { + "epoch": 0.59, + "learning_rate": 7.047883710987602e-05, + "loss": 0.6264, + "step": 13910 + }, + { + "epoch": 0.59, + "learning_rate": 7.045746045318512e-05, + "loss": 0.5506, + "step": 13920 + }, + { + "epoch": 0.59, + "learning_rate": 7.043608379649424e-05, + "loss": 0.6167, + "step": 13930 + }, + { + "epoch": 0.59, + "learning_rate": 7.041470713980333e-05, + "loss": 0.5699, + "step": 13940 + }, + { + "epoch": 0.6, + "learning_rate": 7.039333048311244e-05, + "loss": 0.5513, + "step": 13950 + }, + { + "epoch": 0.6, + "learning_rate": 7.037195382642155e-05, + "loss": 0.6107, + "step": 13960 + }, + { + "epoch": 0.6, + "learning_rate": 7.035057716973066e-05, + "loss": 0.5505, + "step": 13970 + }, + { + "epoch": 0.6, + "learning_rate": 7.032920051303976e-05, + "loss": 0.6095, + "step": 13980 + }, + { + "epoch": 0.6, + "learning_rate": 7.030782385634888e-05, + "loss": 0.5674, + "step": 13990 + }, + { + "epoch": 0.6, + "learning_rate": 7.028644719965799e-05, + "loss": 0.5424, + "step": 14000 + }, + { + "epoch": 0.6, + "learning_rate": 7.026507054296708e-05, + "loss": 0.6185, + "step": 14010 + }, + { + "epoch": 0.6, + "learning_rate": 7.024369388627618e-05, + "loss": 0.5587, + "step": 14020 + }, + { + "epoch": 0.6, + "learning_rate": 7.02223172295853e-05, + "loss": 0.6128, + "step": 14030 + }, + { + "epoch": 0.6, + "learning_rate": 7.02009405728944e-05, + "loss": 0.567, + "step": 14040 + }, + { + "epoch": 0.6, + "learning_rate": 7.017956391620351e-05, + "loss": 0.555, + "step": 14050 + }, + { + "epoch": 0.6, + "learning_rate": 7.015818725951263e-05, + "loss": 0.6101, + "step": 14060 + }, + { + "epoch": 0.6, + "learning_rate": 7.013681060282172e-05, + "loss": 0.5385, + "step": 14070 + }, + { + "epoch": 0.6, + "learning_rate": 7.011543394613082e-05, + "loss": 0.6093, + "step": 14080 + }, + { + "epoch": 0.6, + "learning_rate": 7.009405728943993e-05, + "loss": 0.569, + "step": 14090 + }, + { + "epoch": 0.6, + "learning_rate": 7.007268063274904e-05, + "loss": 0.5558, + "step": 14100 + }, + { + "epoch": 0.6, + "learning_rate": 7.005130397605815e-05, + "loss": 0.6149, + "step": 14110 + }, + { + "epoch": 0.6, + "learning_rate": 7.002992731936725e-05, + "loss": 0.5541, + "step": 14120 + }, + { + "epoch": 0.6, + "learning_rate": 7.000855066267637e-05, + "loss": 0.6064, + "step": 14130 + }, + { + "epoch": 0.6, + "learning_rate": 6.998717400598546e-05, + "loss": 0.5779, + "step": 14140 + }, + { + "epoch": 0.6, + "learning_rate": 6.996579734929457e-05, + "loss": 0.5517, + "step": 14150 + }, + { + "epoch": 0.6, + "learning_rate": 6.994442069260368e-05, + "loss": 0.6183, + "step": 14160 + }, + { + "epoch": 0.6, + "learning_rate": 6.992304403591279e-05, + "loss": 0.555, + "step": 14170 + }, + { + "epoch": 0.6, + "learning_rate": 6.99016673792219e-05, + "loss": 0.6076, + "step": 14180 + }, + { + "epoch": 0.61, + "learning_rate": 6.9880290722531e-05, + "loss": 0.5624, + "step": 14190 + }, + { + "epoch": 0.61, + "learning_rate": 6.98589140658401e-05, + "loss": 0.5534, + "step": 14200 + }, + { + "epoch": 0.61, + "learning_rate": 6.983753740914921e-05, + "loss": 0.6184, + "step": 14210 + }, + { + "epoch": 0.61, + "learning_rate": 6.981616075245831e-05, + "loss": 0.5568, + "step": 14220 + }, + { + "epoch": 0.61, + "learning_rate": 6.979478409576743e-05, + "loss": 0.6007, + "step": 14230 + }, + { + "epoch": 0.61, + "learning_rate": 6.977340743907653e-05, + "loss": 0.5623, + "step": 14240 + }, + { + "epoch": 0.61, + "learning_rate": 6.975203078238564e-05, + "loss": 0.5427, + "step": 14250 + }, + { + "epoch": 0.61, + "learning_rate": 6.973065412569474e-05, + "loss": 0.6178, + "step": 14260 + }, + { + "epoch": 0.61, + "learning_rate": 6.970927746900385e-05, + "loss": 0.5477, + "step": 14270 + }, + { + "epoch": 0.61, + "learning_rate": 6.968790081231295e-05, + "loss": 0.6166, + "step": 14280 + }, + { + "epoch": 0.61, + "learning_rate": 6.966652415562206e-05, + "loss": 0.5634, + "step": 14290 + }, + { + "epoch": 0.61, + "learning_rate": 6.964514749893117e-05, + "loss": 0.5478, + "step": 14300 + }, + { + "epoch": 0.61, + "learning_rate": 6.962377084224028e-05, + "loss": 0.616, + "step": 14310 + }, + { + "epoch": 0.61, + "learning_rate": 6.960239418554938e-05, + "loss": 0.5468, + "step": 14320 + }, + { + "epoch": 0.61, + "learning_rate": 6.958101752885849e-05, + "loss": 0.6115, + "step": 14330 + }, + { + "epoch": 0.61, + "learning_rate": 6.955964087216759e-05, + "loss": 0.5634, + "step": 14340 + }, + { + "epoch": 0.61, + "learning_rate": 6.95382642154767e-05, + "loss": 0.5445, + "step": 14350 + }, + { + "epoch": 0.61, + "learning_rate": 6.95168875587858e-05, + "loss": 0.6135, + "step": 14360 + }, + { + "epoch": 0.61, + "learning_rate": 6.949551090209492e-05, + "loss": 0.5562, + "step": 14370 + }, + { + "epoch": 0.61, + "learning_rate": 6.947413424540402e-05, + "loss": 0.6056, + "step": 14380 + }, + { + "epoch": 0.61, + "learning_rate": 6.945275758871313e-05, + "loss": 0.5616, + "step": 14390 + }, + { + "epoch": 0.61, + "learning_rate": 6.943138093202223e-05, + "loss": 0.5483, + "step": 14400 + }, + { + "epoch": 0.61, + "learning_rate": 6.941000427533134e-05, + "loss": 0.6095, + "step": 14410 + }, + { + "epoch": 0.62, + "learning_rate": 6.938862761864044e-05, + "loss": 0.5591, + "step": 14420 + }, + { + "epoch": 0.62, + "learning_rate": 6.936725096194956e-05, + "loss": 0.6013, + "step": 14430 + }, + { + "epoch": 0.62, + "learning_rate": 6.934587430525867e-05, + "loss": 0.563, + "step": 14440 + }, + { + "epoch": 0.62, + "learning_rate": 6.932449764856777e-05, + "loss": 0.5485, + "step": 14450 + }, + { + "epoch": 0.62, + "learning_rate": 6.930312099187687e-05, + "loss": 0.6175, + "step": 14460 + }, + { + "epoch": 0.62, + "learning_rate": 6.928174433518598e-05, + "loss": 0.5431, + "step": 14470 + }, + { + "epoch": 0.62, + "learning_rate": 6.926036767849508e-05, + "loss": 0.608, + "step": 14480 + }, + { + "epoch": 0.62, + "learning_rate": 6.923899102180419e-05, + "loss": 0.5576, + "step": 14490 + }, + { + "epoch": 0.62, + "learning_rate": 6.92176143651133e-05, + "loss": 0.5476, + "step": 14500 + }, + { + "epoch": 0.62, + "learning_rate": 6.919623770842241e-05, + "loss": 0.6157, + "step": 14510 + }, + { + "epoch": 0.62, + "learning_rate": 6.917486105173151e-05, + "loss": 0.5514, + "step": 14520 + }, + { + "epoch": 0.62, + "learning_rate": 6.915348439504062e-05, + "loss": 0.6143, + "step": 14530 + }, + { + "epoch": 0.62, + "learning_rate": 6.913210773834972e-05, + "loss": 0.5628, + "step": 14540 + }, + { + "epoch": 0.62, + "learning_rate": 6.911073108165883e-05, + "loss": 0.5491, + "step": 14550 + }, + { + "epoch": 0.62, + "learning_rate": 6.908935442496793e-05, + "loss": 0.6153, + "step": 14560 + }, + { + "epoch": 0.62, + "learning_rate": 6.906797776827705e-05, + "loss": 0.5478, + "step": 14570 + }, + { + "epoch": 0.62, + "learning_rate": 6.904660111158616e-05, + "loss": 0.615, + "step": 14580 + }, + { + "epoch": 0.62, + "learning_rate": 6.902522445489526e-05, + "loss": 0.5627, + "step": 14590 + }, + { + "epoch": 0.62, + "learning_rate": 6.900384779820436e-05, + "loss": 0.5443, + "step": 14600 + }, + { + "epoch": 0.62, + "learning_rate": 6.898247114151347e-05, + "loss": 0.6171, + "step": 14610 + }, + { + "epoch": 0.62, + "learning_rate": 6.896109448482257e-05, + "loss": 0.55, + "step": 14620 + }, + { + "epoch": 0.62, + "learning_rate": 6.893971782813168e-05, + "loss": 0.6092, + "step": 14630 + }, + { + "epoch": 0.62, + "learning_rate": 6.89183411714408e-05, + "loss": 0.5593, + "step": 14640 + }, + { + "epoch": 0.62, + "learning_rate": 6.88969645147499e-05, + "loss": 0.5364, + "step": 14650 + }, + { + "epoch": 0.63, + "learning_rate": 6.8875587858059e-05, + "loss": 0.6095, + "step": 14660 + }, + { + "epoch": 0.63, + "learning_rate": 6.885421120136811e-05, + "loss": 0.5511, + "step": 14670 + }, + { + "epoch": 0.63, + "learning_rate": 6.883283454467721e-05, + "loss": 0.6058, + "step": 14680 + }, + { + "epoch": 0.63, + "learning_rate": 6.881145788798632e-05, + "loss": 0.5652, + "step": 14690 + }, + { + "epoch": 0.63, + "learning_rate": 6.879008123129544e-05, + "loss": 0.5472, + "step": 14700 + }, + { + "epoch": 0.63, + "learning_rate": 6.876870457460454e-05, + "loss": 0.6106, + "step": 14710 + }, + { + "epoch": 0.63, + "learning_rate": 6.874732791791365e-05, + "loss": 0.5514, + "step": 14720 + }, + { + "epoch": 0.63, + "learning_rate": 6.872595126122274e-05, + "loss": 0.6062, + "step": 14730 + }, + { + "epoch": 0.63, + "learning_rate": 6.870457460453185e-05, + "loss": 0.5597, + "step": 14740 + }, + { + "epoch": 0.63, + "learning_rate": 6.868319794784096e-05, + "loss": 0.5524, + "step": 14750 + }, + { + "epoch": 0.63, + "learning_rate": 6.866182129115006e-05, + "loss": 0.6109, + "step": 14760 + }, + { + "epoch": 0.63, + "learning_rate": 6.864044463445918e-05, + "loss": 0.5465, + "step": 14770 + }, + { + "epoch": 0.63, + "learning_rate": 6.861906797776829e-05, + "loss": 0.6039, + "step": 14780 + }, + { + "epoch": 0.63, + "learning_rate": 6.859769132107739e-05, + "loss": 0.5574, + "step": 14790 + }, + { + "epoch": 0.63, + "learning_rate": 6.85763146643865e-05, + "loss": 0.5574, + "step": 14800 + }, + { + "epoch": 0.63, + "learning_rate": 6.85549380076956e-05, + "loss": 0.6085, + "step": 14810 + }, + { + "epoch": 0.63, + "learning_rate": 6.85335613510047e-05, + "loss": 0.5468, + "step": 14820 + }, + { + "epoch": 0.63, + "learning_rate": 6.851218469431381e-05, + "loss": 0.6182, + "step": 14830 + }, + { + "epoch": 0.63, + "learning_rate": 6.849080803762293e-05, + "loss": 0.5614, + "step": 14840 + }, + { + "epoch": 0.63, + "learning_rate": 6.846943138093203e-05, + "loss": 0.5475, + "step": 14850 + }, + { + "epoch": 0.63, + "learning_rate": 6.844805472424112e-05, + "loss": 0.6165, + "step": 14860 + }, + { + "epoch": 0.63, + "learning_rate": 6.842667806755024e-05, + "loss": 0.5572, + "step": 14870 + }, + { + "epoch": 0.63, + "learning_rate": 6.840530141085934e-05, + "loss": 0.6132, + "step": 14880 + }, + { + "epoch": 0.64, + "learning_rate": 6.838392475416845e-05, + "loss": 0.5622, + "step": 14890 + }, + { + "epoch": 0.64, + "learning_rate": 6.836254809747755e-05, + "loss": 0.5423, + "step": 14900 + }, + { + "epoch": 0.64, + "learning_rate": 6.834117144078667e-05, + "loss": 0.6082, + "step": 14910 + }, + { + "epoch": 0.64, + "learning_rate": 6.831979478409578e-05, + "loss": 0.5545, + "step": 14920 + }, + { + "epoch": 0.64, + "learning_rate": 6.829841812740487e-05, + "loss": 0.6018, + "step": 14930 + }, + { + "epoch": 0.64, + "learning_rate": 6.827704147071399e-05, + "loss": 0.5695, + "step": 14940 + }, + { + "epoch": 0.64, + "learning_rate": 6.825566481402309e-05, + "loss": 0.5433, + "step": 14950 + }, + { + "epoch": 0.64, + "learning_rate": 6.82342881573322e-05, + "loss": 0.6234, + "step": 14960 + }, + { + "epoch": 0.64, + "learning_rate": 6.821291150064131e-05, + "loss": 0.5469, + "step": 14970 + }, + { + "epoch": 0.64, + "learning_rate": 6.819153484395042e-05, + "loss": 0.599, + "step": 14980 + }, + { + "epoch": 0.64, + "learning_rate": 6.817015818725951e-05, + "loss": 0.556, + "step": 14990 + }, + { + "epoch": 0.64, + "learning_rate": 6.814878153056861e-05, + "loss": 0.5496, + "step": 15000 + }, + { + "epoch": 0.64, + "learning_rate": 6.812740487387773e-05, + "loss": 0.6209, + "step": 15010 + }, + { + "epoch": 0.64, + "learning_rate": 6.810602821718684e-05, + "loss": 0.5453, + "step": 15020 + }, + { + "epoch": 0.64, + "learning_rate": 6.808465156049594e-05, + "loss": 0.6127, + "step": 15030 + }, + { + "epoch": 0.64, + "learning_rate": 6.806327490380506e-05, + "loss": 0.5519, + "step": 15040 + }, + { + "epoch": 0.64, + "learning_rate": 6.804189824711415e-05, + "loss": 0.5505, + "step": 15050 + }, + { + "epoch": 0.64, + "learning_rate": 6.802052159042325e-05, + "loss": 0.6182, + "step": 15060 + }, + { + "epoch": 0.64, + "learning_rate": 6.799914493373237e-05, + "loss": 0.5555, + "step": 15070 + }, + { + "epoch": 0.64, + "learning_rate": 6.797776827704148e-05, + "loss": 0.6014, + "step": 15080 + }, + { + "epoch": 0.64, + "learning_rate": 6.795639162035058e-05, + "loss": 0.566, + "step": 15090 + }, + { + "epoch": 0.64, + "learning_rate": 6.793501496365968e-05, + "loss": 0.5498, + "step": 15100 + }, + { + "epoch": 0.64, + "learning_rate": 6.79136383069688e-05, + "loss": 0.6135, + "step": 15110 + }, + { + "epoch": 0.65, + "learning_rate": 6.78922616502779e-05, + "loss": 0.5434, + "step": 15120 + }, + { + "epoch": 0.65, + "learning_rate": 6.7870884993587e-05, + "loss": 0.5988, + "step": 15130 + }, + { + "epoch": 0.65, + "learning_rate": 6.784950833689612e-05, + "loss": 0.564, + "step": 15140 + }, + { + "epoch": 0.65, + "learning_rate": 6.782813168020522e-05, + "loss": 0.5464, + "step": 15150 + }, + { + "epoch": 0.65, + "learning_rate": 6.780675502351433e-05, + "loss": 0.612, + "step": 15160 + }, + { + "epoch": 0.65, + "learning_rate": 6.778537836682343e-05, + "loss": 0.5407, + "step": 15170 + }, + { + "epoch": 0.65, + "learning_rate": 6.776400171013253e-05, + "loss": 0.608, + "step": 15180 + }, + { + "epoch": 0.65, + "learning_rate": 6.774262505344164e-05, + "loss": 0.5674, + "step": 15190 + }, + { + "epoch": 0.65, + "learning_rate": 6.772124839675074e-05, + "loss": 0.5369, + "step": 15200 + }, + { + "epoch": 0.65, + "learning_rate": 6.769987174005986e-05, + "loss": 0.6188, + "step": 15210 + }, + { + "epoch": 0.65, + "learning_rate": 6.767849508336897e-05, + "loss": 0.5516, + "step": 15220 + }, + { + "epoch": 0.65, + "learning_rate": 6.765711842667807e-05, + "loss": 0.6131, + "step": 15230 + }, + { + "epoch": 0.65, + "learning_rate": 6.763574176998719e-05, + "loss": 0.5624, + "step": 15240 + }, + { + "epoch": 0.65, + "learning_rate": 6.761436511329628e-05, + "loss": 0.5444, + "step": 15250 + }, + { + "epoch": 0.65, + "learning_rate": 6.759298845660538e-05, + "loss": 0.6128, + "step": 15260 + }, + { + "epoch": 0.65, + "learning_rate": 6.757161179991449e-05, + "loss": 0.5486, + "step": 15270 + }, + { + "epoch": 0.65, + "learning_rate": 6.75502351432236e-05, + "loss": 0.5926, + "step": 15280 + }, + { + "epoch": 0.65, + "learning_rate": 6.752885848653271e-05, + "loss": 0.5479, + "step": 15290 + }, + { + "epoch": 0.65, + "learning_rate": 6.750748182984182e-05, + "loss": 0.542, + "step": 15300 + }, + { + "epoch": 0.65, + "learning_rate": 6.748610517315092e-05, + "loss": 0.6213, + "step": 15310 + }, + { + "epoch": 0.65, + "learning_rate": 6.746472851646002e-05, + "loss": 0.5526, + "step": 15320 + }, + { + "epoch": 0.65, + "learning_rate": 6.744335185976913e-05, + "loss": 0.607, + "step": 15330 + }, + { + "epoch": 0.65, + "learning_rate": 6.742197520307825e-05, + "loss": 0.5571, + "step": 15340 + }, + { + "epoch": 0.65, + "learning_rate": 6.740059854638735e-05, + "loss": 0.5377, + "step": 15350 + }, + { + "epoch": 0.66, + "learning_rate": 6.737922188969646e-05, + "loss": 0.6241, + "step": 15360 + }, + { + "epoch": 0.66, + "learning_rate": 6.735784523300556e-05, + "loss": 0.5449, + "step": 15370 + }, + { + "epoch": 0.66, + "learning_rate": 6.733646857631467e-05, + "loss": 0.6074, + "step": 15380 + }, + { + "epoch": 0.66, + "learning_rate": 6.731509191962377e-05, + "loss": 0.5726, + "step": 15390 + }, + { + "epoch": 0.66, + "learning_rate": 6.729371526293287e-05, + "loss": 0.5424, + "step": 15400 + }, + { + "epoch": 0.66, + "learning_rate": 6.727233860624199e-05, + "loss": 0.6141, + "step": 15410 + }, + { + "epoch": 0.66, + "learning_rate": 6.72509619495511e-05, + "loss": 0.5557, + "step": 15420 + }, + { + "epoch": 0.66, + "learning_rate": 6.72295852928602e-05, + "loss": 0.598, + "step": 15430 + }, + { + "epoch": 0.66, + "learning_rate": 6.72082086361693e-05, + "loss": 0.5532, + "step": 15440 + }, + { + "epoch": 0.66, + "learning_rate": 6.718683197947841e-05, + "loss": 0.5503, + "step": 15450 + }, + { + "epoch": 0.66, + "learning_rate": 6.716545532278751e-05, + "loss": 0.6274, + "step": 15460 + }, + { + "epoch": 0.66, + "learning_rate": 6.714407866609662e-05, + "loss": 0.5524, + "step": 15470 + }, + { + "epoch": 0.66, + "learning_rate": 6.712270200940574e-05, + "loss": 0.6094, + "step": 15480 + }, + { + "epoch": 0.66, + "learning_rate": 6.710132535271484e-05, + "loss": 0.5622, + "step": 15490 + }, + { + "epoch": 0.66, + "learning_rate": 6.707994869602395e-05, + "loss": 0.5431, + "step": 15500 + }, + { + "epoch": 0.66, + "learning_rate": 6.705857203933305e-05, + "loss": 0.6123, + "step": 15510 + }, + { + "epoch": 0.66, + "learning_rate": 6.703719538264216e-05, + "loss": 0.5468, + "step": 15520 + }, + { + "epoch": 0.66, + "learning_rate": 6.701581872595126e-05, + "loss": 0.612, + "step": 15530 + }, + { + "epoch": 0.66, + "learning_rate": 6.699444206926036e-05, + "loss": 0.5655, + "step": 15540 + }, + { + "epoch": 0.66, + "learning_rate": 6.697306541256948e-05, + "loss": 0.5501, + "step": 15550 + }, + { + "epoch": 0.66, + "learning_rate": 6.695168875587859e-05, + "loss": 0.6009, + "step": 15560 + }, + { + "epoch": 0.66, + "learning_rate": 6.693031209918769e-05, + "loss": 0.5442, + "step": 15570 + }, + { + "epoch": 0.66, + "learning_rate": 6.69089354424968e-05, + "loss": 0.5972, + "step": 15580 + }, + { + "epoch": 0.67, + "learning_rate": 6.68875587858059e-05, + "loss": 0.5633, + "step": 15590 + }, + { + "epoch": 0.67, + "learning_rate": 6.6866182129115e-05, + "loss": 0.5434, + "step": 15600 + }, + { + "epoch": 0.67, + "learning_rate": 6.684480547242412e-05, + "loss": 0.6107, + "step": 15610 + }, + { + "epoch": 0.67, + "learning_rate": 6.682342881573323e-05, + "loss": 0.5438, + "step": 15620 + }, + { + "epoch": 0.67, + "learning_rate": 6.680205215904233e-05, + "loss": 0.6036, + "step": 15630 + }, + { + "epoch": 0.67, + "learning_rate": 6.678067550235144e-05, + "loss": 0.5584, + "step": 15640 + }, + { + "epoch": 0.67, + "learning_rate": 6.675929884566054e-05, + "loss": 0.5496, + "step": 15650 + }, + { + "epoch": 0.67, + "learning_rate": 6.673792218896965e-05, + "loss": 0.6098, + "step": 15660 + }, + { + "epoch": 0.67, + "learning_rate": 6.671654553227875e-05, + "loss": 0.559, + "step": 15670 + }, + { + "epoch": 0.67, + "learning_rate": 6.669516887558787e-05, + "loss": 0.6044, + "step": 15680 + }, + { + "epoch": 0.67, + "learning_rate": 6.667379221889697e-05, + "loss": 0.5554, + "step": 15690 + }, + { + "epoch": 0.67, + "learning_rate": 6.665241556220608e-05, + "loss": 0.5469, + "step": 15700 + }, + { + "epoch": 0.67, + "learning_rate": 6.663103890551518e-05, + "loss": 0.6054, + "step": 15710 + }, + { + "epoch": 0.67, + "learning_rate": 6.660966224882429e-05, + "loss": 0.5529, + "step": 15720 + }, + { + "epoch": 0.67, + "learning_rate": 6.658828559213339e-05, + "loss": 0.6096, + "step": 15730 + }, + { + "epoch": 0.67, + "learning_rate": 6.65669089354425e-05, + "loss": 0.5618, + "step": 15740 + }, + { + "epoch": 0.67, + "learning_rate": 6.654553227875161e-05, + "loss": 0.5471, + "step": 15750 + }, + { + "epoch": 0.67, + "learning_rate": 6.652415562206072e-05, + "loss": 0.6238, + "step": 15760 + }, + { + "epoch": 0.67, + "learning_rate": 6.650277896536982e-05, + "loss": 0.5481, + "step": 15770 + }, + { + "epoch": 0.67, + "learning_rate": 6.648140230867893e-05, + "loss": 0.603, + "step": 15780 + }, + { + "epoch": 0.67, + "learning_rate": 6.646002565198803e-05, + "loss": 0.5574, + "step": 15790 + }, + { + "epoch": 0.67, + "learning_rate": 6.643864899529714e-05, + "loss": 0.5511, + "step": 15800 + }, + { + "epoch": 0.67, + "learning_rate": 6.641727233860624e-05, + "loss": 0.6084, + "step": 15810 + }, + { + "epoch": 0.67, + "learning_rate": 6.639589568191536e-05, + "loss": 0.5473, + "step": 15820 + }, + { + "epoch": 0.68, + "learning_rate": 6.637451902522446e-05, + "loss": 0.6085, + "step": 15830 + }, + { + "epoch": 0.68, + "learning_rate": 6.635314236853355e-05, + "loss": 0.5656, + "step": 15840 + }, + { + "epoch": 0.68, + "learning_rate": 6.633176571184267e-05, + "loss": 0.5479, + "step": 15850 + }, + { + "epoch": 0.68, + "learning_rate": 6.631038905515178e-05, + "loss": 0.5982, + "step": 15860 + }, + { + "epoch": 0.68, + "learning_rate": 6.628901239846088e-05, + "loss": 0.5513, + "step": 15870 + }, + { + "epoch": 0.68, + "learning_rate": 6.626763574177e-05, + "loss": 0.5952, + "step": 15880 + }, + { + "epoch": 0.68, + "learning_rate": 6.62462590850791e-05, + "loss": 0.5609, + "step": 15890 + }, + { + "epoch": 0.68, + "learning_rate": 6.622488242838821e-05, + "loss": 0.5543, + "step": 15900 + }, + { + "epoch": 0.68, + "learning_rate": 6.62035057716973e-05, + "loss": 0.611, + "step": 15910 + }, + { + "epoch": 0.68, + "learning_rate": 6.618212911500642e-05, + "loss": 0.556, + "step": 15920 + }, + { + "epoch": 0.68, + "learning_rate": 6.616075245831552e-05, + "loss": 0.6082, + "step": 15930 + }, + { + "epoch": 0.68, + "learning_rate": 6.613937580162463e-05, + "loss": 0.5591, + "step": 15940 + }, + { + "epoch": 0.68, + "learning_rate": 6.611799914493374e-05, + "loss": 0.5439, + "step": 15950 + }, + { + "epoch": 0.68, + "learning_rate": 6.609662248824285e-05, + "loss": 0.6095, + "step": 15960 + }, + { + "epoch": 0.68, + "learning_rate": 6.607524583155194e-05, + "loss": 0.5463, + "step": 15970 + }, + { + "epoch": 0.68, + "learning_rate": 6.605386917486104e-05, + "loss": 0.6001, + "step": 15980 + }, + { + "epoch": 0.68, + "learning_rate": 6.603249251817016e-05, + "loss": 0.567, + "step": 15990 + }, + { + "epoch": 0.68, + "learning_rate": 6.601111586147927e-05, + "loss": 0.5419, + "step": 16000 + }, + { + "epoch": 0.68, + "learning_rate": 6.598973920478837e-05, + "loss": 0.6156, + "step": 16010 + }, + { + "epoch": 0.68, + "learning_rate": 6.596836254809749e-05, + "loss": 0.5433, + "step": 16020 + }, + { + "epoch": 0.68, + "learning_rate": 6.59469858914066e-05, + "loss": 0.5996, + "step": 16030 + }, + { + "epoch": 0.68, + "learning_rate": 6.592560923471568e-05, + "loss": 0.5492, + "step": 16040 + }, + { + "epoch": 0.68, + "learning_rate": 6.59042325780248e-05, + "loss": 0.5573, + "step": 16050 + }, + { + "epoch": 0.69, + "learning_rate": 6.588285592133391e-05, + "loss": 0.6128, + "step": 16060 + }, + { + "epoch": 0.69, + "learning_rate": 6.586147926464301e-05, + "loss": 0.5459, + "step": 16070 + }, + { + "epoch": 0.69, + "learning_rate": 6.584010260795212e-05, + "loss": 0.5999, + "step": 16080 + }, + { + "epoch": 0.69, + "learning_rate": 6.581872595126123e-05, + "loss": 0.5648, + "step": 16090 + }, + { + "epoch": 0.69, + "learning_rate": 6.579734929457033e-05, + "loss": 0.5447, + "step": 16100 + }, + { + "epoch": 0.69, + "learning_rate": 6.577597263787943e-05, + "loss": 0.6173, + "step": 16110 + }, + { + "epoch": 0.69, + "learning_rate": 6.575459598118855e-05, + "loss": 0.5517, + "step": 16120 + }, + { + "epoch": 0.69, + "learning_rate": 6.573321932449765e-05, + "loss": 0.5995, + "step": 16130 + }, + { + "epoch": 0.69, + "learning_rate": 6.571184266780676e-05, + "loss": 0.5584, + "step": 16140 + }, + { + "epoch": 0.69, + "learning_rate": 6.569046601111588e-05, + "loss": 0.5453, + "step": 16150 + }, + { + "epoch": 0.69, + "learning_rate": 6.566908935442498e-05, + "loss": 0.6224, + "step": 16160 + }, + { + "epoch": 0.69, + "learning_rate": 6.564771269773407e-05, + "loss": 0.5403, + "step": 16170 + }, + { + "epoch": 0.69, + "learning_rate": 6.562633604104318e-05, + "loss": 0.6052, + "step": 16180 + }, + { + "epoch": 0.69, + "learning_rate": 6.56049593843523e-05, + "loss": 0.5669, + "step": 16190 + }, + { + "epoch": 0.69, + "learning_rate": 6.55835827276614e-05, + "loss": 0.5475, + "step": 16200 + }, + { + "epoch": 0.69, + "learning_rate": 6.55622060709705e-05, + "loss": 0.5999, + "step": 16210 + }, + { + "epoch": 0.69, + "learning_rate": 6.554082941427962e-05, + "loss": 0.5393, + "step": 16220 + }, + { + "epoch": 0.69, + "learning_rate": 6.551945275758871e-05, + "loss": 0.6079, + "step": 16230 + }, + { + "epoch": 0.69, + "learning_rate": 6.549807610089782e-05, + "loss": 0.5649, + "step": 16240 + }, + { + "epoch": 0.69, + "learning_rate": 6.547669944420692e-05, + "loss": 0.5443, + "step": 16250 + }, + { + "epoch": 0.69, + "learning_rate": 6.545532278751604e-05, + "loss": 0.6134, + "step": 16260 + }, + { + "epoch": 0.69, + "learning_rate": 6.543394613082514e-05, + "loss": 0.5366, + "step": 16270 + }, + { + "epoch": 0.69, + "learning_rate": 6.541256947413425e-05, + "loss": 0.6047, + "step": 16280 + }, + { + "epoch": 0.69, + "learning_rate": 6.539119281744335e-05, + "loss": 0.5592, + "step": 16290 + }, + { + "epoch": 0.7, + "learning_rate": 6.536981616075246e-05, + "loss": 0.5327, + "step": 16300 + }, + { + "epoch": 0.7, + "learning_rate": 6.534843950406156e-05, + "loss": 0.6025, + "step": 16310 + }, + { + "epoch": 0.7, + "learning_rate": 6.532706284737068e-05, + "loss": 0.5395, + "step": 16320 + }, + { + "epoch": 0.7, + "learning_rate": 6.530568619067978e-05, + "loss": 0.605, + "step": 16330 + }, + { + "epoch": 0.7, + "learning_rate": 6.528430953398889e-05, + "loss": 0.568, + "step": 16340 + }, + { + "epoch": 0.7, + "learning_rate": 6.526293287729799e-05, + "loss": 0.5483, + "step": 16350 + }, + { + "epoch": 0.7, + "learning_rate": 6.52415562206071e-05, + "loss": 0.6058, + "step": 16360 + }, + { + "epoch": 0.7, + "learning_rate": 6.52201795639162e-05, + "loss": 0.5525, + "step": 16370 + }, + { + "epoch": 0.7, + "learning_rate": 6.51988029072253e-05, + "loss": 0.6069, + "step": 16380 + }, + { + "epoch": 0.7, + "learning_rate": 6.517742625053442e-05, + "loss": 0.5755, + "step": 16390 + }, + { + "epoch": 0.7, + "learning_rate": 6.515604959384353e-05, + "loss": 0.5432, + "step": 16400 + }, + { + "epoch": 0.7, + "learning_rate": 6.513467293715263e-05, + "loss": 0.6073, + "step": 16410 + }, + { + "epoch": 0.7, + "learning_rate": 6.511329628046174e-05, + "loss": 0.5423, + "step": 16420 + }, + { + "epoch": 0.7, + "learning_rate": 6.509191962377084e-05, + "loss": 0.6055, + "step": 16430 + }, + { + "epoch": 0.7, + "learning_rate": 6.507054296707995e-05, + "loss": 0.5449, + "step": 16440 + }, + { + "epoch": 0.7, + "learning_rate": 6.504916631038905e-05, + "loss": 0.5477, + "step": 16450 + }, + { + "epoch": 0.7, + "learning_rate": 6.502778965369817e-05, + "loss": 0.6013, + "step": 16460 + }, + { + "epoch": 0.7, + "learning_rate": 6.500641299700727e-05, + "loss": 0.5393, + "step": 16470 + }, + { + "epoch": 0.7, + "learning_rate": 6.498503634031638e-05, + "loss": 0.6078, + "step": 16480 + }, + { + "epoch": 0.7, + "learning_rate": 6.496365968362548e-05, + "loss": 0.5569, + "step": 16490 + }, + { + "epoch": 0.7, + "learning_rate": 6.494228302693459e-05, + "loss": 0.5439, + "step": 16500 + }, + { + "epoch": 0.7, + "learning_rate": 6.492090637024369e-05, + "loss": 0.6167, + "step": 16510 + }, + { + "epoch": 0.7, + "learning_rate": 6.48995297135528e-05, + "loss": 0.546, + "step": 16520 + }, + { + "epoch": 0.71, + "learning_rate": 6.487815305686191e-05, + "loss": 0.6, + "step": 16530 + }, + { + "epoch": 0.71, + "learning_rate": 6.485677640017102e-05, + "loss": 0.5609, + "step": 16540 + }, + { + "epoch": 0.71, + "learning_rate": 6.483539974348012e-05, + "loss": 0.5456, + "step": 16550 + }, + { + "epoch": 0.71, + "learning_rate": 6.481402308678923e-05, + "loss": 0.6172, + "step": 16560 + }, + { + "epoch": 0.71, + "learning_rate": 6.479264643009833e-05, + "loss": 0.5521, + "step": 16570 + }, + { + "epoch": 0.71, + "learning_rate": 6.477126977340744e-05, + "loss": 0.6042, + "step": 16580 + }, + { + "epoch": 0.71, + "learning_rate": 6.474989311671656e-05, + "loss": 0.5636, + "step": 16590 + }, + { + "epoch": 0.71, + "learning_rate": 6.472851646002566e-05, + "loss": 0.5445, + "step": 16600 + }, + { + "epoch": 0.71, + "learning_rate": 6.470713980333476e-05, + "loss": 0.6134, + "step": 16610 + }, + { + "epoch": 0.71, + "learning_rate": 6.468576314664387e-05, + "loss": 0.5471, + "step": 16620 + }, + { + "epoch": 0.71, + "learning_rate": 6.466438648995297e-05, + "loss": 0.5964, + "step": 16630 + }, + { + "epoch": 0.71, + "learning_rate": 6.464300983326208e-05, + "loss": 0.5617, + "step": 16640 + }, + { + "epoch": 0.71, + "learning_rate": 6.462163317657118e-05, + "loss": 0.5519, + "step": 16650 + }, + { + "epoch": 0.71, + "learning_rate": 6.46002565198803e-05, + "loss": 0.6119, + "step": 16660 + }, + { + "epoch": 0.71, + "learning_rate": 6.45788798631894e-05, + "loss": 0.5514, + "step": 16670 + }, + { + "epoch": 0.71, + "learning_rate": 6.455750320649851e-05, + "loss": 0.6003, + "step": 16680 + }, + { + "epoch": 0.71, + "learning_rate": 6.453612654980761e-05, + "loss": 0.5506, + "step": 16690 + }, + { + "epoch": 0.71, + "learning_rate": 6.451474989311672e-05, + "loss": 0.5413, + "step": 16700 + }, + { + "epoch": 0.71, + "learning_rate": 6.449337323642582e-05, + "loss": 0.6038, + "step": 16710 + }, + { + "epoch": 0.71, + "learning_rate": 6.447199657973493e-05, + "loss": 0.5328, + "step": 16720 + }, + { + "epoch": 0.71, + "learning_rate": 6.445061992304405e-05, + "loss": 0.6035, + "step": 16730 + }, + { + "epoch": 0.71, + "learning_rate": 6.442924326635315e-05, + "loss": 0.5497, + "step": 16740 + }, + { + "epoch": 0.71, + "learning_rate": 6.440786660966225e-05, + "loss": 0.5468, + "step": 16750 + }, + { + "epoch": 0.72, + "learning_rate": 6.438648995297136e-05, + "loss": 0.6047, + "step": 16760 + }, + { + "epoch": 0.72, + "learning_rate": 6.436511329628046e-05, + "loss": 0.5367, + "step": 16770 + }, + { + "epoch": 0.72, + "learning_rate": 6.434373663958957e-05, + "loss": 0.6069, + "step": 16780 + }, + { + "epoch": 0.72, + "learning_rate": 6.432235998289867e-05, + "loss": 0.55, + "step": 16790 + }, + { + "epoch": 0.72, + "learning_rate": 6.430098332620779e-05, + "loss": 0.5481, + "step": 16800 + }, + { + "epoch": 0.72, + "learning_rate": 6.42796066695169e-05, + "loss": 0.6097, + "step": 16810 + }, + { + "epoch": 0.72, + "learning_rate": 6.4258230012826e-05, + "loss": 0.5391, + "step": 16820 + }, + { + "epoch": 0.72, + "learning_rate": 6.42368533561351e-05, + "loss": 0.5986, + "step": 16830 + }, + { + "epoch": 0.72, + "learning_rate": 6.421547669944421e-05, + "loss": 0.5548, + "step": 16840 + }, + { + "epoch": 0.72, + "learning_rate": 6.419410004275331e-05, + "loss": 0.5509, + "step": 16850 + }, + { + "epoch": 0.72, + "learning_rate": 6.417272338606243e-05, + "loss": 0.6078, + "step": 16860 + }, + { + "epoch": 0.72, + "learning_rate": 6.415134672937154e-05, + "loss": 0.5482, + "step": 16870 + }, + { + "epoch": 0.72, + "learning_rate": 6.412997007268064e-05, + "loss": 0.5988, + "step": 16880 + }, + { + "epoch": 0.72, + "learning_rate": 6.410859341598973e-05, + "loss": 0.5504, + "step": 16890 + }, + { + "epoch": 0.72, + "learning_rate": 6.408721675929885e-05, + "loss": 0.5439, + "step": 16900 + }, + { + "epoch": 0.72, + "learning_rate": 6.406584010260795e-05, + "loss": 0.6126, + "step": 16910 + }, + { + "epoch": 0.72, + "learning_rate": 6.404446344591706e-05, + "loss": 0.5525, + "step": 16920 + }, + { + "epoch": 0.72, + "learning_rate": 6.402308678922618e-05, + "loss": 0.6029, + "step": 16930 + }, + { + "epoch": 0.72, + "learning_rate": 6.400171013253528e-05, + "loss": 0.5571, + "step": 16940 + }, + { + "epoch": 0.72, + "learning_rate": 6.398033347584439e-05, + "loss": 0.5438, + "step": 16950 + }, + { + "epoch": 0.72, + "learning_rate": 6.395895681915349e-05, + "loss": 0.6036, + "step": 16960 + }, + { + "epoch": 0.72, + "learning_rate": 6.39375801624626e-05, + "loss": 0.5452, + "step": 16970 + }, + { + "epoch": 0.72, + "learning_rate": 6.39162035057717e-05, + "loss": 0.5876, + "step": 16980 + }, + { + "epoch": 0.72, + "learning_rate": 6.38948268490808e-05, + "loss": 0.5511, + "step": 16990 + }, + { + "epoch": 0.73, + "learning_rate": 6.387345019238992e-05, + "loss": 0.5408, + "step": 17000 + }, + { + "epoch": 0.73, + "learning_rate": 6.385207353569903e-05, + "loss": 0.6054, + "step": 17010 + }, + { + "epoch": 0.73, + "learning_rate": 6.383069687900812e-05, + "loss": 0.5533, + "step": 17020 + }, + { + "epoch": 0.73, + "learning_rate": 6.380932022231723e-05, + "loss": 0.605, + "step": 17030 + }, + { + "epoch": 0.73, + "learning_rate": 6.378794356562634e-05, + "loss": 0.5545, + "step": 17040 + }, + { + "epoch": 0.73, + "learning_rate": 6.376656690893544e-05, + "loss": 0.5409, + "step": 17050 + }, + { + "epoch": 0.73, + "learning_rate": 6.374519025224455e-05, + "loss": 0.6025, + "step": 17060 + }, + { + "epoch": 0.73, + "learning_rate": 6.372381359555367e-05, + "loss": 0.539, + "step": 17070 + }, + { + "epoch": 0.73, + "learning_rate": 6.370243693886276e-05, + "loss": 0.5994, + "step": 17080 + }, + { + "epoch": 0.73, + "learning_rate": 6.368106028217186e-05, + "loss": 0.5615, + "step": 17090 + }, + { + "epoch": 0.73, + "learning_rate": 6.365968362548098e-05, + "loss": 0.5353, + "step": 17100 + }, + { + "epoch": 0.73, + "learning_rate": 6.363830696879008e-05, + "loss": 0.621, + "step": 17110 + }, + { + "epoch": 0.73, + "learning_rate": 6.361693031209919e-05, + "loss": 0.5477, + "step": 17120 + }, + { + "epoch": 0.73, + "learning_rate": 6.359555365540831e-05, + "loss": 0.6055, + "step": 17130 + }, + { + "epoch": 0.73, + "learning_rate": 6.357417699871741e-05, + "loss": 0.5412, + "step": 17140 + }, + { + "epoch": 0.73, + "learning_rate": 6.35528003420265e-05, + "loss": 0.5579, + "step": 17150 + }, + { + "epoch": 0.73, + "learning_rate": 6.353142368533561e-05, + "loss": 0.6094, + "step": 17160 + }, + { + "epoch": 0.73, + "learning_rate": 6.351004702864473e-05, + "loss": 0.5529, + "step": 17170 + }, + { + "epoch": 0.73, + "learning_rate": 6.348867037195383e-05, + "loss": 0.5846, + "step": 17180 + }, + { + "epoch": 0.73, + "learning_rate": 6.346729371526293e-05, + "loss": 0.5621, + "step": 17190 + }, + { + "epoch": 0.73, + "learning_rate": 6.344591705857205e-05, + "loss": 0.5463, + "step": 17200 + }, + { + "epoch": 0.73, + "learning_rate": 6.342454040188114e-05, + "loss": 0.61, + "step": 17210 + }, + { + "epoch": 0.73, + "learning_rate": 6.340316374519025e-05, + "loss": 0.5293, + "step": 17220 + }, + { + "epoch": 0.74, + "learning_rate": 6.338178708849937e-05, + "loss": 0.5979, + "step": 17230 + }, + { + "epoch": 0.74, + "learning_rate": 6.336041043180847e-05, + "loss": 0.5604, + "step": 17240 + }, + { + "epoch": 0.74, + "learning_rate": 6.333903377511757e-05, + "loss": 0.5422, + "step": 17250 + }, + { + "epoch": 0.74, + "learning_rate": 6.331765711842668e-05, + "loss": 0.604, + "step": 17260 + }, + { + "epoch": 0.74, + "learning_rate": 6.32962804617358e-05, + "loss": 0.5511, + "step": 17270 + }, + { + "epoch": 0.74, + "learning_rate": 6.327490380504489e-05, + "loss": 0.598, + "step": 17280 + }, + { + "epoch": 0.74, + "learning_rate": 6.325352714835399e-05, + "loss": 0.5573, + "step": 17290 + }, + { + "epoch": 0.74, + "learning_rate": 6.323215049166311e-05, + "loss": 0.5378, + "step": 17300 + }, + { + "epoch": 0.74, + "learning_rate": 6.321077383497222e-05, + "loss": 0.6017, + "step": 17310 + }, + { + "epoch": 0.74, + "learning_rate": 6.318939717828132e-05, + "loss": 0.5463, + "step": 17320 + }, + { + "epoch": 0.74, + "learning_rate": 6.316802052159042e-05, + "loss": 0.5951, + "step": 17330 + }, + { + "epoch": 0.74, + "learning_rate": 6.314664386489953e-05, + "loss": 0.5614, + "step": 17340 + }, + { + "epoch": 0.74, + "learning_rate": 6.312526720820863e-05, + "loss": 0.5425, + "step": 17350 + }, + { + "epoch": 0.74, + "learning_rate": 6.310389055151774e-05, + "loss": 0.6129, + "step": 17360 + }, + { + "epoch": 0.74, + "learning_rate": 6.308251389482686e-05, + "loss": 0.5388, + "step": 17370 + }, + { + "epoch": 0.74, + "learning_rate": 6.306113723813596e-05, + "loss": 0.5997, + "step": 17380 + }, + { + "epoch": 0.74, + "learning_rate": 6.303976058144506e-05, + "loss": 0.5555, + "step": 17390 + }, + { + "epoch": 0.74, + "learning_rate": 6.301838392475418e-05, + "loss": 0.5479, + "step": 17400 + }, + { + "epoch": 0.74, + "learning_rate": 6.299700726806327e-05, + "loss": 0.6108, + "step": 17410 + }, + { + "epoch": 0.74, + "learning_rate": 6.297563061137238e-05, + "loss": 0.5383, + "step": 17420 + }, + { + "epoch": 0.74, + "learning_rate": 6.295425395468148e-05, + "loss": 0.5983, + "step": 17430 + }, + { + "epoch": 0.74, + "learning_rate": 6.29328772979906e-05, + "loss": 0.5641, + "step": 17440 + }, + { + "epoch": 0.74, + "learning_rate": 6.29115006412997e-05, + "loss": 0.5459, + "step": 17450 + }, + { + "epoch": 0.74, + "learning_rate": 6.289012398460881e-05, + "loss": 0.6169, + "step": 17460 + }, + { + "epoch": 0.75, + "learning_rate": 6.286874732791791e-05, + "loss": 0.5524, + "step": 17470 + }, + { + "epoch": 0.75, + "learning_rate": 6.284737067122702e-05, + "loss": 0.6054, + "step": 17480 + }, + { + "epoch": 0.75, + "learning_rate": 6.282599401453612e-05, + "loss": 0.5504, + "step": 17490 + }, + { + "epoch": 0.75, + "learning_rate": 6.280461735784524e-05, + "loss": 0.5439, + "step": 17500 + }, + { + "epoch": 0.75, + "learning_rate": 6.278324070115435e-05, + "loss": 0.6144, + "step": 17510 + }, + { + "epoch": 0.75, + "learning_rate": 6.276186404446345e-05, + "loss": 0.5403, + "step": 17520 + }, + { + "epoch": 0.75, + "learning_rate": 6.274048738777256e-05, + "loss": 0.5901, + "step": 17530 + }, + { + "epoch": 0.75, + "learning_rate": 6.271911073108166e-05, + "loss": 0.5567, + "step": 17540 + }, + { + "epoch": 0.75, + "learning_rate": 6.269773407439076e-05, + "loss": 0.5438, + "step": 17550 + }, + { + "epoch": 0.75, + "learning_rate": 6.267635741769987e-05, + "loss": 0.6143, + "step": 17560 + }, + { + "epoch": 0.75, + "learning_rate": 6.265498076100899e-05, + "loss": 0.5406, + "step": 17570 + }, + { + "epoch": 0.75, + "learning_rate": 6.263360410431809e-05, + "loss": 0.6106, + "step": 17580 + }, + { + "epoch": 0.75, + "learning_rate": 6.26122274476272e-05, + "loss": 0.5512, + "step": 17590 + }, + { + "epoch": 0.75, + "learning_rate": 6.25908507909363e-05, + "loss": 0.5435, + "step": 17600 + }, + { + "epoch": 0.75, + "learning_rate": 6.25694741342454e-05, + "loss": 0.6053, + "step": 17610 + }, + { + "epoch": 0.75, + "learning_rate": 6.254809747755451e-05, + "loss": 0.5473, + "step": 17620 + }, + { + "epoch": 0.75, + "learning_rate": 6.252672082086361e-05, + "loss": 0.6014, + "step": 17630 + }, + { + "epoch": 0.75, + "learning_rate": 6.250534416417273e-05, + "loss": 0.5564, + "step": 17640 + }, + { + "epoch": 0.75, + "learning_rate": 6.248396750748184e-05, + "loss": 0.5487, + "step": 17650 + }, + { + "epoch": 0.75, + "learning_rate": 6.246259085079094e-05, + "loss": 0.6089, + "step": 17660 + }, + { + "epoch": 0.75, + "learning_rate": 6.244121419410005e-05, + "loss": 0.5461, + "step": 17670 + }, + { + "epoch": 0.75, + "learning_rate": 6.241983753740915e-05, + "loss": 0.5984, + "step": 17680 + }, + { + "epoch": 0.75, + "learning_rate": 6.239846088071825e-05, + "loss": 0.5527, + "step": 17690 + }, + { + "epoch": 0.76, + "learning_rate": 6.237708422402736e-05, + "loss": 0.5457, + "step": 17700 + }, + { + "epoch": 0.76, + "learning_rate": 6.235570756733648e-05, + "loss": 0.6033, + "step": 17710 + }, + { + "epoch": 0.76, + "learning_rate": 6.233433091064558e-05, + "loss": 0.5414, + "step": 17720 + }, + { + "epoch": 0.76, + "learning_rate": 6.231295425395469e-05, + "loss": 0.5885, + "step": 17730 + }, + { + "epoch": 0.76, + "learning_rate": 6.229157759726379e-05, + "loss": 0.5565, + "step": 17740 + }, + { + "epoch": 0.76, + "learning_rate": 6.22702009405729e-05, + "loss": 0.5529, + "step": 17750 + }, + { + "epoch": 0.76, + "learning_rate": 6.2248824283882e-05, + "loss": 0.6132, + "step": 17760 + }, + { + "epoch": 0.76, + "learning_rate": 6.222744762719112e-05, + "loss": 0.5507, + "step": 17770 + }, + { + "epoch": 0.76, + "learning_rate": 6.220607097050022e-05, + "loss": 0.5943, + "step": 17780 + }, + { + "epoch": 0.76, + "learning_rate": 6.218469431380933e-05, + "loss": 0.5488, + "step": 17790 + }, + { + "epoch": 0.76, + "learning_rate": 6.216331765711843e-05, + "loss": 0.5418, + "step": 17800 + }, + { + "epoch": 0.76, + "learning_rate": 6.214194100042754e-05, + "loss": 0.6067, + "step": 17810 + }, + { + "epoch": 0.76, + "learning_rate": 6.212056434373664e-05, + "loss": 0.5479, + "step": 17820 + }, + { + "epoch": 0.76, + "learning_rate": 6.209918768704574e-05, + "loss": 0.594, + "step": 17830 + }, + { + "epoch": 0.76, + "learning_rate": 6.207781103035486e-05, + "loss": 0.5497, + "step": 17840 + }, + { + "epoch": 0.76, + "learning_rate": 6.205643437366397e-05, + "loss": 0.5378, + "step": 17850 + }, + { + "epoch": 0.76, + "learning_rate": 6.203505771697307e-05, + "loss": 0.6112, + "step": 17860 + }, + { + "epoch": 0.76, + "learning_rate": 6.201368106028216e-05, + "loss": 0.5372, + "step": 17870 + }, + { + "epoch": 0.76, + "learning_rate": 6.199230440359128e-05, + "loss": 0.6025, + "step": 17880 + }, + { + "epoch": 0.76, + "learning_rate": 6.197092774690039e-05, + "loss": 0.5464, + "step": 17890 + }, + { + "epoch": 0.76, + "learning_rate": 6.194955109020949e-05, + "loss": 0.5466, + "step": 17900 + }, + { + "epoch": 0.76, + "learning_rate": 6.192817443351861e-05, + "loss": 0.6114, + "step": 17910 + }, + { + "epoch": 0.76, + "learning_rate": 6.190679777682771e-05, + "loss": 0.5411, + "step": 17920 + }, + { + "epoch": 0.76, + "learning_rate": 6.188542112013682e-05, + "loss": 0.5994, + "step": 17930 + }, + { + "epoch": 0.77, + "learning_rate": 6.186404446344592e-05, + "loss": 0.5553, + "step": 17940 + }, + { + "epoch": 0.77, + "learning_rate": 6.184266780675503e-05, + "loss": 0.5492, + "step": 17950 + }, + { + "epoch": 0.77, + "learning_rate": 6.182129115006413e-05, + "loss": 0.6096, + "step": 17960 + }, + { + "epoch": 0.77, + "learning_rate": 6.179991449337324e-05, + "loss": 0.5391, + "step": 17970 + }, + { + "epoch": 0.77, + "learning_rate": 6.177853783668235e-05, + "loss": 0.6039, + "step": 17980 + }, + { + "epoch": 0.77, + "learning_rate": 6.175716117999146e-05, + "loss": 0.5528, + "step": 17990 + }, + { + "epoch": 0.77, + "learning_rate": 6.173578452330055e-05, + "loss": 0.5364, + "step": 18000 + }, + { + "epoch": 0.77, + "learning_rate": 6.171440786660967e-05, + "loss": 0.6045, + "step": 18010 + }, + { + "epoch": 0.77, + "learning_rate": 6.169303120991877e-05, + "loss": 0.5494, + "step": 18020 + }, + { + "epoch": 0.77, + "learning_rate": 6.167165455322788e-05, + "loss": 0.5933, + "step": 18030 + }, + { + "epoch": 0.77, + "learning_rate": 6.1650277896537e-05, + "loss": 0.5572, + "step": 18040 + }, + { + "epoch": 0.77, + "learning_rate": 6.16289012398461e-05, + "loss": 0.5318, + "step": 18050 + }, + { + "epoch": 0.77, + "learning_rate": 6.16075245831552e-05, + "loss": 0.6114, + "step": 18060 + }, + { + "epoch": 0.77, + "learning_rate": 6.15861479264643e-05, + "loss": 0.5443, + "step": 18070 + }, + { + "epoch": 0.77, + "learning_rate": 6.156477126977341e-05, + "loss": 0.5965, + "step": 18080 + }, + { + "epoch": 0.77, + "learning_rate": 6.154339461308252e-05, + "loss": 0.5559, + "step": 18090 + }, + { + "epoch": 0.77, + "learning_rate": 6.152201795639162e-05, + "loss": 0.5402, + "step": 18100 + }, + { + "epoch": 0.77, + "learning_rate": 6.150064129970074e-05, + "loss": 0.5973, + "step": 18110 + }, + { + "epoch": 0.77, + "learning_rate": 6.147926464300984e-05, + "loss": 0.5467, + "step": 18120 + }, + { + "epoch": 0.77, + "learning_rate": 6.145788798631893e-05, + "loss": 0.6031, + "step": 18130 + }, + { + "epoch": 0.77, + "learning_rate": 6.143651132962804e-05, + "loss": 0.5514, + "step": 18140 + }, + { + "epoch": 0.77, + "learning_rate": 6.141513467293716e-05, + "loss": 0.5423, + "step": 18150 + }, + { + "epoch": 0.77, + "learning_rate": 6.139375801624626e-05, + "loss": 0.6089, + "step": 18160 + }, + { + "epoch": 0.78, + "learning_rate": 6.137238135955537e-05, + "loss": 0.5418, + "step": 18170 + }, + { + "epoch": 0.78, + "learning_rate": 6.135100470286448e-05, + "loss": 0.5969, + "step": 18180 + }, + { + "epoch": 0.78, + "learning_rate": 6.132962804617359e-05, + "loss": 0.5611, + "step": 18190 + }, + { + "epoch": 0.78, + "learning_rate": 6.130825138948268e-05, + "loss": 0.5402, + "step": 18200 + }, + { + "epoch": 0.78, + "learning_rate": 6.12868747327918e-05, + "loss": 0.5977, + "step": 18210 + }, + { + "epoch": 0.78, + "learning_rate": 6.12654980761009e-05, + "loss": 0.5402, + "step": 18220 + }, + { + "epoch": 0.78, + "learning_rate": 6.124412141941e-05, + "loss": 0.6007, + "step": 18230 + }, + { + "epoch": 0.78, + "learning_rate": 6.122274476271911e-05, + "loss": 0.553, + "step": 18240 + }, + { + "epoch": 0.78, + "learning_rate": 6.120136810602823e-05, + "loss": 0.5448, + "step": 18250 + }, + { + "epoch": 0.78, + "learning_rate": 6.117999144933732e-05, + "loss": 0.6071, + "step": 18260 + }, + { + "epoch": 0.78, + "learning_rate": 6.115861479264642e-05, + "loss": 0.5442, + "step": 18270 + }, + { + "epoch": 0.78, + "learning_rate": 6.113723813595554e-05, + "loss": 0.601, + "step": 18280 + }, + { + "epoch": 0.78, + "learning_rate": 6.111586147926465e-05, + "loss": 0.5626, + "step": 18290 + }, + { + "epoch": 0.78, + "learning_rate": 6.109448482257375e-05, + "loss": 0.5438, + "step": 18300 + }, + { + "epoch": 0.78, + "learning_rate": 6.107310816588287e-05, + "loss": 0.6059, + "step": 18310 + }, + { + "epoch": 0.78, + "learning_rate": 6.105173150919196e-05, + "loss": 0.5412, + "step": 18320 + }, + { + "epoch": 0.78, + "learning_rate": 6.103035485250107e-05, + "loss": 0.598, + "step": 18330 + }, + { + "epoch": 0.78, + "learning_rate": 6.1008978195810176e-05, + "loss": 0.5517, + "step": 18340 + }, + { + "epoch": 0.78, + "learning_rate": 6.098760153911929e-05, + "loss": 0.5358, + "step": 18350 + }, + { + "epoch": 0.78, + "learning_rate": 6.096622488242839e-05, + "loss": 0.6126, + "step": 18360 + }, + { + "epoch": 0.78, + "learning_rate": 6.09448482257375e-05, + "loss": 0.5423, + "step": 18370 + }, + { + "epoch": 0.78, + "learning_rate": 6.092347156904661e-05, + "loss": 0.5889, + "step": 18380 + }, + { + "epoch": 0.78, + "learning_rate": 6.090209491235571e-05, + "loss": 0.5559, + "step": 18390 + }, + { + "epoch": 0.78, + "learning_rate": 6.088071825566482e-05, + "loss": 0.5475, + "step": 18400 + }, + { + "epoch": 0.79, + "learning_rate": 6.0859341598973915e-05, + "loss": 0.5988, + "step": 18410 + }, + { + "epoch": 0.79, + "learning_rate": 6.083796494228303e-05, + "loss": 0.5447, + "step": 18420 + }, + { + "epoch": 0.79, + "learning_rate": 6.081658828559214e-05, + "loss": 0.5964, + "step": 18430 + }, + { + "epoch": 0.79, + "learning_rate": 6.079521162890124e-05, + "loss": 0.5417, + "step": 18440 + }, + { + "epoch": 0.79, + "learning_rate": 6.077383497221035e-05, + "loss": 0.5367, + "step": 18450 + }, + { + "epoch": 0.79, + "learning_rate": 6.075245831551946e-05, + "loss": 0.6078, + "step": 18460 + }, + { + "epoch": 0.79, + "learning_rate": 6.073108165882856e-05, + "loss": 0.536, + "step": 18470 + }, + { + "epoch": 0.79, + "learning_rate": 6.0709705002137673e-05, + "loss": 0.5941, + "step": 18480 + }, + { + "epoch": 0.79, + "learning_rate": 6.068832834544678e-05, + "loss": 0.558, + "step": 18490 + }, + { + "epoch": 0.79, + "learning_rate": 6.066695168875588e-05, + "loss": 0.5506, + "step": 18500 + }, + { + "epoch": 0.79, + "learning_rate": 6.064557503206498e-05, + "loss": 0.612, + "step": 18510 + }, + { + "epoch": 0.79, + "learning_rate": 6.06241983753741e-05, + "loss": 0.5463, + "step": 18520 + }, + { + "epoch": 0.79, + "learning_rate": 6.06028217186832e-05, + "loss": 0.6018, + "step": 18530 + }, + { + "epoch": 0.79, + "learning_rate": 6.05814450619923e-05, + "loss": 0.5613, + "step": 18540 + }, + { + "epoch": 0.79, + "learning_rate": 6.056006840530142e-05, + "loss": 0.5428, + "step": 18550 + }, + { + "epoch": 0.79, + "learning_rate": 6.053869174861052e-05, + "loss": 0.6173, + "step": 18560 + }, + { + "epoch": 0.79, + "learning_rate": 6.051731509191962e-05, + "loss": 0.5419, + "step": 18570 + }, + { + "epoch": 0.79, + "learning_rate": 6.049593843522874e-05, + "loss": 0.5883, + "step": 18580 + }, + { + "epoch": 0.79, + "learning_rate": 6.0474561778537843e-05, + "loss": 0.5496, + "step": 18590 + }, + { + "epoch": 0.79, + "learning_rate": 6.045318512184695e-05, + "loss": 0.5453, + "step": 18600 + }, + { + "epoch": 0.79, + "learning_rate": 6.0431808465156046e-05, + "loss": 0.6091, + "step": 18610 + }, + { + "epoch": 0.79, + "learning_rate": 6.0410431808465164e-05, + "loss": 0.5405, + "step": 18620 + }, + { + "epoch": 0.79, + "learning_rate": 6.038905515177427e-05, + "loss": 0.6, + "step": 18630 + }, + { + "epoch": 0.8, + "learning_rate": 6.0367678495083366e-05, + "loss": 0.5491, + "step": 18640 + }, + { + "epoch": 0.8, + "learning_rate": 6.0346301838392484e-05, + "loss": 0.5428, + "step": 18650 + }, + { + "epoch": 0.8, + "learning_rate": 6.032492518170159e-05, + "loss": 0.6001, + "step": 18660 + }, + { + "epoch": 0.8, + "learning_rate": 6.0303548525010686e-05, + "loss": 0.554, + "step": 18670 + }, + { + "epoch": 0.8, + "learning_rate": 6.028217186831979e-05, + "loss": 0.5888, + "step": 18680 + }, + { + "epoch": 0.8, + "learning_rate": 6.026079521162891e-05, + "loss": 0.551, + "step": 18690 + }, + { + "epoch": 0.8, + "learning_rate": 6.0239418554938007e-05, + "loss": 0.5353, + "step": 18700 + }, + { + "epoch": 0.8, + "learning_rate": 6.021804189824711e-05, + "loss": 0.6005, + "step": 18710 + }, + { + "epoch": 0.8, + "learning_rate": 6.019666524155623e-05, + "loss": 0.5479, + "step": 18720 + }, + { + "epoch": 0.8, + "learning_rate": 6.017528858486533e-05, + "loss": 0.5869, + "step": 18730 + }, + { + "epoch": 0.8, + "learning_rate": 6.015391192817443e-05, + "loss": 0.5578, + "step": 18740 + }, + { + "epoch": 0.8, + "learning_rate": 6.013253527148355e-05, + "loss": 0.5463, + "step": 18750 + }, + { + "epoch": 0.8, + "learning_rate": 6.0111158614792654e-05, + "loss": 0.6061, + "step": 18760 + }, + { + "epoch": 0.8, + "learning_rate": 6.008978195810175e-05, + "loss": 0.542, + "step": 18770 + }, + { + "epoch": 0.8, + "learning_rate": 6.0068405301410856e-05, + "loss": 0.6112, + "step": 18780 + }, + { + "epoch": 0.8, + "learning_rate": 6.0047028644719974e-05, + "loss": 0.5618, + "step": 18790 + }, + { + "epoch": 0.8, + "learning_rate": 6.002565198802907e-05, + "loss": 0.5326, + "step": 18800 + }, + { + "epoch": 0.8, + "learning_rate": 6.0004275331338177e-05, + "loss": 0.6123, + "step": 18810 + }, + { + "epoch": 0.8, + "learning_rate": 5.9982898674647295e-05, + "loss": 0.5456, + "step": 18820 + }, + { + "epoch": 0.8, + "learning_rate": 5.996152201795639e-05, + "loss": 0.594, + "step": 18830 + }, + { + "epoch": 0.8, + "learning_rate": 5.99401453612655e-05, + "loss": 0.5503, + "step": 18840 + }, + { + "epoch": 0.8, + "learning_rate": 5.9918768704574615e-05, + "loss": 0.5468, + "step": 18850 + }, + { + "epoch": 0.8, + "learning_rate": 5.989739204788371e-05, + "loss": 0.6077, + "step": 18860 + }, + { + "epoch": 0.81, + "learning_rate": 5.987601539119282e-05, + "loss": 0.5418, + "step": 18870 + }, + { + "epoch": 0.81, + "learning_rate": 5.985463873450192e-05, + "loss": 0.5957, + "step": 18880 + }, + { + "epoch": 0.81, + "learning_rate": 5.983326207781104e-05, + "loss": 0.5498, + "step": 18890 + }, + { + "epoch": 0.81, + "learning_rate": 5.981188542112014e-05, + "loss": 0.5389, + "step": 18900 + }, + { + "epoch": 0.81, + "learning_rate": 5.979050876442924e-05, + "loss": 0.6052, + "step": 18910 + }, + { + "epoch": 0.81, + "learning_rate": 5.976913210773836e-05, + "loss": 0.5395, + "step": 18920 + }, + { + "epoch": 0.81, + "learning_rate": 5.974775545104746e-05, + "loss": 0.5939, + "step": 18930 + }, + { + "epoch": 0.81, + "learning_rate": 5.972637879435656e-05, + "loss": 0.554, + "step": 18940 + }, + { + "epoch": 0.81, + "learning_rate": 5.970500213766567e-05, + "loss": 0.5297, + "step": 18950 + }, + { + "epoch": 0.81, + "learning_rate": 5.968362548097478e-05, + "loss": 0.5997, + "step": 18960 + }, + { + "epoch": 0.81, + "learning_rate": 5.966224882428388e-05, + "loss": 0.5491, + "step": 18970 + }, + { + "epoch": 0.81, + "learning_rate": 5.964087216759299e-05, + "loss": 0.6009, + "step": 18980 + }, + { + "epoch": 0.81, + "learning_rate": 5.96194955109021e-05, + "loss": 0.5508, + "step": 18990 + }, + { + "epoch": 0.81, + "learning_rate": 5.95981188542112e-05, + "loss": 0.5384, + "step": 19000 + }, + { + "epoch": 0.81, + "learning_rate": 5.957674219752031e-05, + "loss": 0.6029, + "step": 19010 + }, + { + "epoch": 0.81, + "learning_rate": 5.955536554082942e-05, + "loss": 0.5443, + "step": 19020 + }, + { + "epoch": 0.81, + "learning_rate": 5.953398888413852e-05, + "loss": 0.5974, + "step": 19030 + }, + { + "epoch": 0.81, + "learning_rate": 5.951261222744763e-05, + "loss": 0.5557, + "step": 19040 + }, + { + "epoch": 0.81, + "learning_rate": 5.949123557075673e-05, + "loss": 0.5353, + "step": 19050 + }, + { + "epoch": 0.81, + "learning_rate": 5.9469858914065844e-05, + "loss": 0.5967, + "step": 19060 + }, + { + "epoch": 0.81, + "learning_rate": 5.944848225737495e-05, + "loss": 0.5462, + "step": 19070 + }, + { + "epoch": 0.81, + "learning_rate": 5.942710560068405e-05, + "loss": 0.5933, + "step": 19080 + }, + { + "epoch": 0.81, + "learning_rate": 5.9405728943993164e-05, + "loss": 0.55, + "step": 19090 + }, + { + "epoch": 0.81, + "learning_rate": 5.938435228730227e-05, + "loss": 0.5356, + "step": 19100 + }, + { + "epoch": 0.82, + "learning_rate": 5.936297563061137e-05, + "loss": 0.6037, + "step": 19110 + }, + { + "epoch": 0.82, + "learning_rate": 5.9341598973920484e-05, + "loss": 0.5441, + "step": 19120 + }, + { + "epoch": 0.82, + "learning_rate": 5.932022231722959e-05, + "loss": 0.6127, + "step": 19130 + }, + { + "epoch": 0.82, + "learning_rate": 5.929884566053869e-05, + "loss": 0.5548, + "step": 19140 + }, + { + "epoch": 0.82, + "learning_rate": 5.92774690038478e-05, + "loss": 0.5314, + "step": 19150 + }, + { + "epoch": 0.82, + "learning_rate": 5.925609234715691e-05, + "loss": 0.5982, + "step": 19160 + }, + { + "epoch": 0.82, + "learning_rate": 5.9234715690466013e-05, + "loss": 0.5398, + "step": 19170 + }, + { + "epoch": 0.82, + "learning_rate": 5.921333903377512e-05, + "loss": 0.592, + "step": 19180 + }, + { + "epoch": 0.82, + "learning_rate": 5.919196237708423e-05, + "loss": 0.5492, + "step": 19190 + }, + { + "epoch": 0.82, + "learning_rate": 5.9170585720393334e-05, + "loss": 0.538, + "step": 19200 + }, + { + "epoch": 0.82, + "learning_rate": 5.914920906370244e-05, + "loss": 0.6043, + "step": 19210 + }, + { + "epoch": 0.82, + "learning_rate": 5.912783240701154e-05, + "loss": 0.5316, + "step": 19220 + }, + { + "epoch": 0.82, + "learning_rate": 5.9106455750320654e-05, + "loss": 0.5946, + "step": 19230 + }, + { + "epoch": 0.82, + "learning_rate": 5.908507909362976e-05, + "loss": 0.5544, + "step": 19240 + }, + { + "epoch": 0.82, + "learning_rate": 5.906370243693886e-05, + "loss": 0.546, + "step": 19250 + }, + { + "epoch": 0.82, + "learning_rate": 5.9042325780247974e-05, + "loss": 0.6037, + "step": 19260 + }, + { + "epoch": 0.82, + "learning_rate": 5.902094912355708e-05, + "loss": 0.5547, + "step": 19270 + }, + { + "epoch": 0.82, + "learning_rate": 5.8999572466866183e-05, + "loss": 0.6003, + "step": 19280 + }, + { + "epoch": 0.82, + "learning_rate": 5.8978195810175295e-05, + "loss": 0.5537, + "step": 19290 + }, + { + "epoch": 0.82, + "learning_rate": 5.89568191534844e-05, + "loss": 0.528, + "step": 19300 + }, + { + "epoch": 0.82, + "learning_rate": 5.8935442496793504e-05, + "loss": 0.6007, + "step": 19310 + }, + { + "epoch": 0.82, + "learning_rate": 5.891406584010261e-05, + "loss": 0.5431, + "step": 19320 + }, + { + "epoch": 0.82, + "learning_rate": 5.889268918341172e-05, + "loss": 0.5949, + "step": 19330 + }, + { + "epoch": 0.83, + "learning_rate": 5.8871312526720824e-05, + "loss": 0.5474, + "step": 19340 + }, + { + "epoch": 0.83, + "learning_rate": 5.884993587002993e-05, + "loss": 0.5324, + "step": 19350 + }, + { + "epoch": 0.83, + "learning_rate": 5.882855921333904e-05, + "loss": 0.6074, + "step": 19360 + }, + { + "epoch": 0.83, + "learning_rate": 5.8807182556648144e-05, + "loss": 0.5434, + "step": 19370 + }, + { + "epoch": 0.83, + "learning_rate": 5.878580589995725e-05, + "loss": 0.5858, + "step": 19380 + }, + { + "epoch": 0.83, + "learning_rate": 5.876442924326636e-05, + "loss": 0.5517, + "step": 19390 + }, + { + "epoch": 0.83, + "learning_rate": 5.8743052586575465e-05, + "loss": 0.5515, + "step": 19400 + }, + { + "epoch": 0.83, + "learning_rate": 5.872167592988457e-05, + "loss": 0.6073, + "step": 19410 + }, + { + "epoch": 0.83, + "learning_rate": 5.8700299273193674e-05, + "loss": 0.5404, + "step": 19420 + }, + { + "epoch": 0.83, + "learning_rate": 5.8678922616502785e-05, + "loss": 0.5955, + "step": 19430 + }, + { + "epoch": 0.83, + "learning_rate": 5.865754595981189e-05, + "loss": 0.5563, + "step": 19440 + }, + { + "epoch": 0.83, + "learning_rate": 5.8636169303120994e-05, + "loss": 0.5411, + "step": 19450 + }, + { + "epoch": 0.83, + "learning_rate": 5.8614792646430105e-05, + "loss": 0.605, + "step": 19460 + }, + { + "epoch": 0.83, + "learning_rate": 5.859341598973921e-05, + "loss": 0.5447, + "step": 19470 + }, + { + "epoch": 0.83, + "learning_rate": 5.8572039333048314e-05, + "loss": 0.5972, + "step": 19480 + }, + { + "epoch": 0.83, + "learning_rate": 5.855066267635741e-05, + "loss": 0.5486, + "step": 19490 + }, + { + "epoch": 0.83, + "learning_rate": 5.852928601966653e-05, + "loss": 0.5396, + "step": 19500 + }, + { + "epoch": 0.83, + "learning_rate": 5.8507909362975635e-05, + "loss": 0.5956, + "step": 19510 + }, + { + "epoch": 0.83, + "learning_rate": 5.848653270628473e-05, + "loss": 0.5347, + "step": 19520 + }, + { + "epoch": 0.83, + "learning_rate": 5.846515604959385e-05, + "loss": 0.6002, + "step": 19530 + }, + { + "epoch": 0.83, + "learning_rate": 5.8443779392902955e-05, + "loss": 0.5509, + "step": 19540 + }, + { + "epoch": 0.83, + "learning_rate": 5.842240273621206e-05, + "loss": 0.5313, + "step": 19550 + }, + { + "epoch": 0.83, + "learning_rate": 5.840102607952117e-05, + "loss": 0.6076, + "step": 19560 + }, + { + "epoch": 0.83, + "learning_rate": 5.8379649422830275e-05, + "loss": 0.5359, + "step": 19570 + }, + { + "epoch": 0.84, + "learning_rate": 5.835827276613938e-05, + "loss": 0.5892, + "step": 19580 + }, + { + "epoch": 0.84, + "learning_rate": 5.833689610944848e-05, + "loss": 0.5579, + "step": 19590 + }, + { + "epoch": 0.84, + "learning_rate": 5.8315519452757596e-05, + "loss": 0.539, + "step": 19600 + }, + { + "epoch": 0.84, + "learning_rate": 5.82941427960667e-05, + "loss": 0.6003, + "step": 19610 + }, + { + "epoch": 0.84, + "learning_rate": 5.82727661393758e-05, + "loss": 0.5345, + "step": 19620 + }, + { + "epoch": 0.84, + "learning_rate": 5.8251389482684916e-05, + "loss": 0.5962, + "step": 19630 + }, + { + "epoch": 0.84, + "learning_rate": 5.823001282599402e-05, + "loss": 0.5534, + "step": 19640 + }, + { + "epoch": 0.84, + "learning_rate": 5.820863616930312e-05, + "loss": 0.5389, + "step": 19650 + }, + { + "epoch": 0.84, + "learning_rate": 5.8187259512612236e-05, + "loss": 0.6036, + "step": 19660 + }, + { + "epoch": 0.84, + "learning_rate": 5.816588285592134e-05, + "loss": 0.5484, + "step": 19670 + }, + { + "epoch": 0.84, + "learning_rate": 5.8144506199230445e-05, + "loss": 0.5932, + "step": 19680 + }, + { + "epoch": 0.84, + "learning_rate": 5.812312954253954e-05, + "loss": 0.5513, + "step": 19690 + }, + { + "epoch": 0.84, + "learning_rate": 5.810175288584866e-05, + "loss": 0.5332, + "step": 19700 + }, + { + "epoch": 0.84, + "learning_rate": 5.8080376229157765e-05, + "loss": 0.5999, + "step": 19710 + }, + { + "epoch": 0.84, + "learning_rate": 5.805899957246686e-05, + "loss": 0.5469, + "step": 19720 + }, + { + "epoch": 0.84, + "learning_rate": 5.803762291577598e-05, + "loss": 0.5805, + "step": 19730 + }, + { + "epoch": 0.84, + "learning_rate": 5.8016246259085086e-05, + "loss": 0.5482, + "step": 19740 + }, + { + "epoch": 0.84, + "learning_rate": 5.7994869602394184e-05, + "loss": 0.5346, + "step": 19750 + }, + { + "epoch": 0.84, + "learning_rate": 5.797349294570329e-05, + "loss": 0.6019, + "step": 19760 + }, + { + "epoch": 0.84, + "learning_rate": 5.7952116289012406e-05, + "loss": 0.5406, + "step": 19770 + }, + { + "epoch": 0.84, + "learning_rate": 5.7930739632321504e-05, + "loss": 0.5919, + "step": 19780 + }, + { + "epoch": 0.84, + "learning_rate": 5.790936297563061e-05, + "loss": 0.5551, + "step": 19790 + }, + { + "epoch": 0.84, + "learning_rate": 5.7887986318939726e-05, + "loss": 0.539, + "step": 19800 + }, + { + "epoch": 0.85, + "learning_rate": 5.7866609662248824e-05, + "loss": 0.6048, + "step": 19810 + }, + { + "epoch": 0.85, + "learning_rate": 5.784523300555793e-05, + "loss": 0.5319, + "step": 19820 + }, + { + "epoch": 0.85, + "learning_rate": 5.782385634886705e-05, + "loss": 0.5966, + "step": 19830 + }, + { + "epoch": 0.85, + "learning_rate": 5.780247969217615e-05, + "loss": 0.5552, + "step": 19840 + }, + { + "epoch": 0.85, + "learning_rate": 5.778110303548525e-05, + "loss": 0.5344, + "step": 19850 + }, + { + "epoch": 0.85, + "learning_rate": 5.7759726378794353e-05, + "loss": 0.6041, + "step": 19860 + }, + { + "epoch": 0.85, + "learning_rate": 5.773834972210347e-05, + "loss": 0.5436, + "step": 19870 + }, + { + "epoch": 0.85, + "learning_rate": 5.771697306541257e-05, + "loss": 0.591, + "step": 19880 + }, + { + "epoch": 0.85, + "learning_rate": 5.7695596408721674e-05, + "loss": 0.5445, + "step": 19890 + }, + { + "epoch": 0.85, + "learning_rate": 5.767421975203079e-05, + "loss": 0.5381, + "step": 19900 + }, + { + "epoch": 0.85, + "learning_rate": 5.765284309533989e-05, + "loss": 0.6059, + "step": 19910 + }, + { + "epoch": 0.85, + "learning_rate": 5.7631466438648994e-05, + "loss": 0.535, + "step": 19920 + }, + { + "epoch": 0.85, + "learning_rate": 5.761008978195811e-05, + "loss": 0.5897, + "step": 19930 + }, + { + "epoch": 0.85, + "learning_rate": 5.758871312526721e-05, + "loss": 0.5422, + "step": 19940 + }, + { + "epoch": 0.85, + "learning_rate": 5.7567336468576314e-05, + "loss": 0.537, + "step": 19950 + }, + { + "epoch": 0.85, + "learning_rate": 5.754595981188542e-05, + "loss": 0.5993, + "step": 19960 + }, + { + "epoch": 0.85, + "learning_rate": 5.752458315519453e-05, + "loss": 0.5404, + "step": 19970 + }, + { + "epoch": 0.85, + "learning_rate": 5.7503206498503635e-05, + "loss": 0.5844, + "step": 19980 + }, + { + "epoch": 0.85, + "learning_rate": 5.748182984181274e-05, + "loss": 0.5508, + "step": 19990 + }, + { + "epoch": 0.85, + "learning_rate": 5.746045318512186e-05, + "loss": 0.5198, + "step": 20000 + }, + { + "epoch": 0.85, + "learning_rate": 5.7439076528430955e-05, + "loss": 0.6004, + "step": 20010 + }, + { + "epoch": 0.85, + "learning_rate": 5.741769987174006e-05, + "loss": 0.5405, + "step": 20020 + }, + { + "epoch": 0.85, + "learning_rate": 5.7396323215049164e-05, + "loss": 0.5959, + "step": 20030 + }, + { + "epoch": 0.85, + "learning_rate": 5.7374946558358275e-05, + "loss": 0.5421, + "step": 20040 + }, + { + "epoch": 0.86, + "learning_rate": 5.735356990166738e-05, + "loss": 0.5265, + "step": 20050 + }, + { + "epoch": 0.86, + "learning_rate": 5.7332193244976484e-05, + "loss": 0.6, + "step": 20060 + }, + { + "epoch": 0.86, + "learning_rate": 5.7310816588285596e-05, + "loss": 0.5298, + "step": 20070 + }, + { + "epoch": 0.86, + "learning_rate": 5.72894399315947e-05, + "loss": 0.5941, + "step": 20080 + }, + { + "epoch": 0.86, + "learning_rate": 5.7268063274903805e-05, + "loss": 0.5567, + "step": 20090 + }, + { + "epoch": 0.86, + "learning_rate": 5.7246686618212916e-05, + "loss": 0.5233, + "step": 20100 + }, + { + "epoch": 0.86, + "learning_rate": 5.722530996152202e-05, + "loss": 0.6093, + "step": 20110 + }, + { + "epoch": 0.86, + "learning_rate": 5.7203933304831125e-05, + "loss": 0.5326, + "step": 20120 + }, + { + "epoch": 0.86, + "learning_rate": 5.718255664814023e-05, + "loss": 0.6004, + "step": 20130 + }, + { + "epoch": 0.86, + "learning_rate": 5.716117999144934e-05, + "loss": 0.5669, + "step": 20140 + }, + { + "epoch": 0.86, + "learning_rate": 5.7139803334758445e-05, + "loss": 0.5367, + "step": 20150 + }, + { + "epoch": 0.86, + "learning_rate": 5.711842667806755e-05, + "loss": 0.596, + "step": 20160 + }, + { + "epoch": 0.86, + "learning_rate": 5.709705002137666e-05, + "loss": 0.5525, + "step": 20170 + }, + { + "epoch": 0.86, + "learning_rate": 5.7075673364685766e-05, + "loss": 0.5978, + "step": 20180 + }, + { + "epoch": 0.86, + "learning_rate": 5.705429670799487e-05, + "loss": 0.5462, + "step": 20190 + }, + { + "epoch": 0.86, + "learning_rate": 5.703292005130398e-05, + "loss": 0.54, + "step": 20200 + }, + { + "epoch": 0.86, + "learning_rate": 5.7011543394613086e-05, + "loss": 0.5983, + "step": 20210 + }, + { + "epoch": 0.86, + "learning_rate": 5.699016673792219e-05, + "loss": 0.5388, + "step": 20220 + }, + { + "epoch": 0.86, + "learning_rate": 5.6968790081231295e-05, + "loss": 0.597, + "step": 20230 + }, + { + "epoch": 0.86, + "learning_rate": 5.6947413424540406e-05, + "loss": 0.5617, + "step": 20240 + }, + { + "epoch": 0.86, + "learning_rate": 5.692603676784951e-05, + "loss": 0.5306, + "step": 20250 + }, + { + "epoch": 0.86, + "learning_rate": 5.6904660111158615e-05, + "loss": 0.601, + "step": 20260 + }, + { + "epoch": 0.86, + "learning_rate": 5.6883283454467727e-05, + "loss": 0.54, + "step": 20270 + }, + { + "epoch": 0.87, + "learning_rate": 5.686190679777683e-05, + "loss": 0.5905, + "step": 20280 + }, + { + "epoch": 0.87, + "learning_rate": 5.6840530141085936e-05, + "loss": 0.5599, + "step": 20290 + }, + { + "epoch": 0.87, + "learning_rate": 5.681915348439504e-05, + "loss": 0.5438, + "step": 20300 + }, + { + "epoch": 0.87, + "learning_rate": 5.679777682770415e-05, + "loss": 0.6022, + "step": 20310 + }, + { + "epoch": 0.87, + "learning_rate": 5.6776400171013256e-05, + "loss": 0.5244, + "step": 20320 + }, + { + "epoch": 0.87, + "learning_rate": 5.675502351432236e-05, + "loss": 0.5905, + "step": 20330 + }, + { + "epoch": 0.87, + "learning_rate": 5.673364685763147e-05, + "loss": 0.5503, + "step": 20340 + }, + { + "epoch": 0.87, + "learning_rate": 5.6712270200940576e-05, + "loss": 0.5322, + "step": 20350 + }, + { + "epoch": 0.87, + "learning_rate": 5.669089354424968e-05, + "loss": 0.6064, + "step": 20360 + }, + { + "epoch": 0.87, + "learning_rate": 5.666951688755879e-05, + "loss": 0.5453, + "step": 20370 + }, + { + "epoch": 0.87, + "learning_rate": 5.6648140230867896e-05, + "loss": 0.593, + "step": 20380 + }, + { + "epoch": 0.87, + "learning_rate": 5.6626763574177e-05, + "loss": 0.5486, + "step": 20390 + }, + { + "epoch": 0.87, + "learning_rate": 5.6605386917486105e-05, + "loss": 0.5426, + "step": 20400 + }, + { + "epoch": 0.87, + "learning_rate": 5.658401026079522e-05, + "loss": 0.6055, + "step": 20410 + }, + { + "epoch": 0.87, + "learning_rate": 5.656263360410432e-05, + "loss": 0.541, + "step": 20420 + }, + { + "epoch": 0.87, + "learning_rate": 5.6541256947413426e-05, + "loss": 0.5992, + "step": 20430 + }, + { + "epoch": 0.87, + "learning_rate": 5.651988029072254e-05, + "loss": 0.5508, + "step": 20440 + }, + { + "epoch": 0.87, + "learning_rate": 5.649850363403164e-05, + "loss": 0.5352, + "step": 20450 + }, + { + "epoch": 0.87, + "learning_rate": 5.6477126977340746e-05, + "loss": 0.5986, + "step": 20460 + }, + { + "epoch": 0.87, + "learning_rate": 5.645575032064986e-05, + "loss": 0.5446, + "step": 20470 + }, + { + "epoch": 0.87, + "learning_rate": 5.643437366395896e-05, + "loss": 0.5984, + "step": 20480 + }, + { + "epoch": 0.87, + "learning_rate": 5.6412997007268066e-05, + "loss": 0.5426, + "step": 20490 + }, + { + "epoch": 0.87, + "learning_rate": 5.639162035057717e-05, + "loss": 0.5391, + "step": 20500 + }, + { + "epoch": 0.87, + "learning_rate": 5.637024369388628e-05, + "loss": 0.5997, + "step": 20510 + }, + { + "epoch": 0.88, + "learning_rate": 5.634886703719539e-05, + "loss": 0.5297, + "step": 20520 + }, + { + "epoch": 0.88, + "learning_rate": 5.632749038050449e-05, + "loss": 0.5924, + "step": 20530 + }, + { + "epoch": 0.88, + "learning_rate": 5.63061137238136e-05, + "loss": 0.5502, + "step": 20540 + }, + { + "epoch": 0.88, + "learning_rate": 5.628473706712271e-05, + "loss": 0.5297, + "step": 20550 + }, + { + "epoch": 0.88, + "learning_rate": 5.626336041043181e-05, + "loss": 0.5915, + "step": 20560 + }, + { + "epoch": 0.88, + "learning_rate": 5.624198375374091e-05, + "loss": 0.5363, + "step": 20570 + }, + { + "epoch": 0.88, + "learning_rate": 5.622060709705003e-05, + "loss": 0.5982, + "step": 20580 + }, + { + "epoch": 0.88, + "learning_rate": 5.619923044035913e-05, + "loss": 0.5542, + "step": 20590 + }, + { + "epoch": 0.88, + "learning_rate": 5.617785378366823e-05, + "loss": 0.5359, + "step": 20600 + }, + { + "epoch": 0.88, + "learning_rate": 5.615647712697735e-05, + "loss": 0.5999, + "step": 20610 + }, + { + "epoch": 0.88, + "learning_rate": 5.613510047028645e-05, + "loss": 0.5455, + "step": 20620 + }, + { + "epoch": 0.88, + "learning_rate": 5.611372381359556e-05, + "loss": 0.6033, + "step": 20630 + }, + { + "epoch": 0.88, + "learning_rate": 5.609234715690467e-05, + "loss": 0.5505, + "step": 20640 + }, + { + "epoch": 0.88, + "learning_rate": 5.607097050021377e-05, + "loss": 0.5404, + "step": 20650 + }, + { + "epoch": 0.88, + "learning_rate": 5.604959384352288e-05, + "loss": 0.6059, + "step": 20660 + }, + { + "epoch": 0.88, + "learning_rate": 5.6028217186831975e-05, + "loss": 0.532, + "step": 20670 + }, + { + "epoch": 0.88, + "learning_rate": 5.600684053014109e-05, + "loss": 0.5928, + "step": 20680 + }, + { + "epoch": 0.88, + "learning_rate": 5.59854638734502e-05, + "loss": 0.5459, + "step": 20690 + }, + { + "epoch": 0.88, + "learning_rate": 5.5964087216759295e-05, + "loss": 0.5339, + "step": 20700 + }, + { + "epoch": 0.88, + "learning_rate": 5.594271056006841e-05, + "loss": 0.5905, + "step": 20710 + }, + { + "epoch": 0.88, + "learning_rate": 5.592133390337752e-05, + "loss": 0.5389, + "step": 20720 + }, + { + "epoch": 0.88, + "learning_rate": 5.5899957246686615e-05, + "loss": 0.5874, + "step": 20730 + }, + { + "epoch": 0.88, + "learning_rate": 5.587858058999573e-05, + "loss": 0.5474, + "step": 20740 + }, + { + "epoch": 0.89, + "learning_rate": 5.585720393330484e-05, + "loss": 0.5267, + "step": 20750 + }, + { + "epoch": 0.89, + "learning_rate": 5.5835827276613936e-05, + "loss": 0.606, + "step": 20760 + }, + { + "epoch": 0.89, + "learning_rate": 5.581445061992304e-05, + "loss": 0.5355, + "step": 20770 + }, + { + "epoch": 0.89, + "learning_rate": 5.579307396323216e-05, + "loss": 0.5976, + "step": 20780 + }, + { + "epoch": 0.89, + "learning_rate": 5.577169730654126e-05, + "loss": 0.5564, + "step": 20790 + }, + { + "epoch": 0.89, + "learning_rate": 5.575032064985036e-05, + "loss": 0.5323, + "step": 20800 + }, + { + "epoch": 0.89, + "learning_rate": 5.572894399315948e-05, + "loss": 0.6026, + "step": 20810 + }, + { + "epoch": 0.89, + "learning_rate": 5.570756733646858e-05, + "loss": 0.5389, + "step": 20820 + }, + { + "epoch": 0.89, + "learning_rate": 5.568619067977768e-05, + "loss": 0.5955, + "step": 20830 + }, + { + "epoch": 0.89, + "learning_rate": 5.5664814023086785e-05, + "loss": 0.546, + "step": 20840 + }, + { + "epoch": 0.89, + "learning_rate": 5.56434373663959e-05, + "loss": 0.5335, + "step": 20850 + }, + { + "epoch": 0.89, + "learning_rate": 5.5622060709705e-05, + "loss": 0.6122, + "step": 20860 + }, + { + "epoch": 0.89, + "learning_rate": 5.5600684053014106e-05, + "loss": 0.5249, + "step": 20870 + }, + { + "epoch": 0.89, + "learning_rate": 5.5579307396323224e-05, + "loss": 0.5895, + "step": 20880 + }, + { + "epoch": 0.89, + "learning_rate": 5.555793073963232e-05, + "loss": 0.5518, + "step": 20890 + }, + { + "epoch": 0.89, + "learning_rate": 5.5536554082941426e-05, + "loss": 0.5336, + "step": 20900 + }, + { + "epoch": 0.89, + "learning_rate": 5.5515177426250544e-05, + "loss": 0.5982, + "step": 20910 + }, + { + "epoch": 0.89, + "learning_rate": 5.549380076955965e-05, + "loss": 0.5328, + "step": 20920 + }, + { + "epoch": 0.89, + "learning_rate": 5.5472424112868746e-05, + "loss": 0.5856, + "step": 20930 + }, + { + "epoch": 0.89, + "learning_rate": 5.545104745617785e-05, + "loss": 0.5567, + "step": 20940 + }, + { + "epoch": 0.89, + "learning_rate": 5.542967079948697e-05, + "loss": 0.5324, + "step": 20950 + }, + { + "epoch": 0.89, + "learning_rate": 5.5408294142796066e-05, + "loss": 0.5969, + "step": 20960 + }, + { + "epoch": 0.89, + "learning_rate": 5.538691748610517e-05, + "loss": 0.5389, + "step": 20970 + }, + { + "epoch": 0.9, + "learning_rate": 5.536554082941429e-05, + "loss": 0.591, + "step": 20980 + }, + { + "epoch": 0.9, + "learning_rate": 5.534416417272339e-05, + "loss": 0.5547, + "step": 20990 + }, + { + "epoch": 0.9, + "learning_rate": 5.532278751603249e-05, + "loss": 0.5378, + "step": 21000 + }, + { + "epoch": 0.9, + "learning_rate": 5.530141085934161e-05, + "loss": 0.5931, + "step": 21010 + }, + { + "epoch": 0.9, + "learning_rate": 5.528003420265071e-05, + "loss": 0.546, + "step": 21020 + }, + { + "epoch": 0.9, + "learning_rate": 5.525865754595981e-05, + "loss": 0.5945, + "step": 21030 + }, + { + "epoch": 0.9, + "learning_rate": 5.5237280889268916e-05, + "loss": 0.5528, + "step": 21040 + }, + { + "epoch": 0.9, + "learning_rate": 5.521590423257803e-05, + "loss": 0.5408, + "step": 21050 + }, + { + "epoch": 0.9, + "learning_rate": 5.519452757588713e-05, + "loss": 0.605, + "step": 21060 + }, + { + "epoch": 0.9, + "learning_rate": 5.5173150919196236e-05, + "loss": 0.5429, + "step": 21070 + }, + { + "epoch": 0.9, + "learning_rate": 5.5151774262505355e-05, + "loss": 0.5868, + "step": 21080 + }, + { + "epoch": 0.9, + "learning_rate": 5.513039760581445e-05, + "loss": 0.5473, + "step": 21090 + }, + { + "epoch": 0.9, + "learning_rate": 5.510902094912356e-05, + "loss": 0.5264, + "step": 21100 + }, + { + "epoch": 0.9, + "learning_rate": 5.508764429243266e-05, + "loss": 0.5953, + "step": 21110 + }, + { + "epoch": 0.9, + "learning_rate": 5.506626763574177e-05, + "loss": 0.5401, + "step": 21120 + }, + { + "epoch": 0.9, + "learning_rate": 5.504489097905088e-05, + "loss": 0.6003, + "step": 21130 + }, + { + "epoch": 0.9, + "learning_rate": 5.502351432235998e-05, + "loss": 0.5497, + "step": 21140 + }, + { + "epoch": 0.9, + "learning_rate": 5.500213766566909e-05, + "loss": 0.523, + "step": 21150 + }, + { + "epoch": 0.9, + "learning_rate": 5.49807610089782e-05, + "loss": 0.5955, + "step": 21160 + }, + { + "epoch": 0.9, + "learning_rate": 5.49593843522873e-05, + "loss": 0.5367, + "step": 21170 + }, + { + "epoch": 0.9, + "learning_rate": 5.493800769559641e-05, + "loss": 0.5901, + "step": 21180 + }, + { + "epoch": 0.9, + "learning_rate": 5.491663103890552e-05, + "loss": 0.5389, + "step": 21190 + }, + { + "epoch": 0.9, + "learning_rate": 5.489525438221462e-05, + "loss": 0.5353, + "step": 21200 + }, + { + "epoch": 0.9, + "learning_rate": 5.487387772552373e-05, + "loss": 0.6045, + "step": 21210 + }, + { + "epoch": 0.91, + "learning_rate": 5.485250106883284e-05, + "loss": 0.5351, + "step": 21220 + }, + { + "epoch": 0.91, + "learning_rate": 5.483112441214194e-05, + "loss": 0.5895, + "step": 21230 + }, + { + "epoch": 0.91, + "learning_rate": 5.480974775545105e-05, + "loss": 0.5514, + "step": 21240 + }, + { + "epoch": 0.91, + "learning_rate": 5.478837109876016e-05, + "loss": 0.5362, + "step": 21250 + }, + { + "epoch": 0.91, + "learning_rate": 5.476699444206926e-05, + "loss": 0.5933, + "step": 21260 + }, + { + "epoch": 0.91, + "learning_rate": 5.474561778537837e-05, + "loss": 0.5358, + "step": 21270 + }, + { + "epoch": 0.91, + "learning_rate": 5.472424112868748e-05, + "loss": 0.5993, + "step": 21280 + }, + { + "epoch": 0.91, + "learning_rate": 5.470286447199658e-05, + "loss": 0.5455, + "step": 21290 + }, + { + "epoch": 0.91, + "learning_rate": 5.468148781530569e-05, + "loss": 0.5417, + "step": 21300 + }, + { + "epoch": 0.91, + "learning_rate": 5.466011115861479e-05, + "loss": 0.6006, + "step": 21310 + }, + { + "epoch": 0.91, + "learning_rate": 5.4638734501923903e-05, + "loss": 0.5393, + "step": 21320 + }, + { + "epoch": 0.91, + "learning_rate": 5.461735784523301e-05, + "loss": 0.5911, + "step": 21330 + }, + { + "epoch": 0.91, + "learning_rate": 5.459598118854211e-05, + "loss": 0.5507, + "step": 21340 + }, + { + "epoch": 0.91, + "learning_rate": 5.4574604531851224e-05, + "loss": 0.5345, + "step": 21350 + }, + { + "epoch": 0.91, + "learning_rate": 5.455322787516033e-05, + "loss": 0.5991, + "step": 21360 + }, + { + "epoch": 0.91, + "learning_rate": 5.453185121846943e-05, + "loss": 0.5205, + "step": 21370 + }, + { + "epoch": 0.91, + "learning_rate": 5.451047456177854e-05, + "loss": 0.6007, + "step": 21380 + }, + { + "epoch": 0.91, + "learning_rate": 5.448909790508765e-05, + "loss": 0.5589, + "step": 21390 + }, + { + "epoch": 0.91, + "learning_rate": 5.446772124839675e-05, + "loss": 0.5331, + "step": 21400 + }, + { + "epoch": 0.91, + "learning_rate": 5.444634459170586e-05, + "loss": 0.6008, + "step": 21410 + }, + { + "epoch": 0.91, + "learning_rate": 5.442496793501497e-05, + "loss": 0.5388, + "step": 21420 + }, + { + "epoch": 0.91, + "learning_rate": 5.440359127832407e-05, + "loss": 0.5969, + "step": 21430 + }, + { + "epoch": 0.91, + "learning_rate": 5.438221462163318e-05, + "loss": 0.5484, + "step": 21440 + }, + { + "epoch": 0.92, + "learning_rate": 5.436083796494229e-05, + "loss": 0.5328, + "step": 21450 + }, + { + "epoch": 0.92, + "learning_rate": 5.4339461308251394e-05, + "loss": 0.5929, + "step": 21460 + }, + { + "epoch": 0.92, + "learning_rate": 5.43180846515605e-05, + "loss": 0.535, + "step": 21470 + }, + { + "epoch": 0.92, + "learning_rate": 5.42967079948696e-05, + "loss": 0.5848, + "step": 21480 + }, + { + "epoch": 0.92, + "learning_rate": 5.4275331338178714e-05, + "loss": 0.5469, + "step": 21490 + }, + { + "epoch": 0.92, + "learning_rate": 5.425395468148782e-05, + "loss": 0.5395, + "step": 21500 + }, + { + "epoch": 0.92, + "learning_rate": 5.423257802479692e-05, + "loss": 0.5943, + "step": 21510 + }, + { + "epoch": 0.92, + "learning_rate": 5.4211201368106034e-05, + "loss": 0.5385, + "step": 21520 + }, + { + "epoch": 0.92, + "learning_rate": 5.418982471141514e-05, + "loss": 0.5847, + "step": 21530 + }, + { + "epoch": 0.92, + "learning_rate": 5.416844805472424e-05, + "loss": 0.5442, + "step": 21540 + }, + { + "epoch": 0.92, + "learning_rate": 5.4147071398033355e-05, + "loss": 0.5322, + "step": 21550 + }, + { + "epoch": 0.92, + "learning_rate": 5.412569474134246e-05, + "loss": 0.6063, + "step": 21560 + }, + { + "epoch": 0.92, + "learning_rate": 5.4104318084651564e-05, + "loss": 0.5256, + "step": 21570 + }, + { + "epoch": 0.92, + "learning_rate": 5.408294142796067e-05, + "loss": 0.5824, + "step": 21580 + }, + { + "epoch": 0.92, + "learning_rate": 5.406156477126978e-05, + "loss": 0.549, + "step": 21590 + }, + { + "epoch": 0.92, + "learning_rate": 5.4040188114578884e-05, + "loss": 0.5375, + "step": 21600 + }, + { + "epoch": 0.92, + "learning_rate": 5.401881145788799e-05, + "loss": 0.6146, + "step": 21610 + }, + { + "epoch": 0.92, + "learning_rate": 5.39974348011971e-05, + "loss": 0.5359, + "step": 21620 + }, + { + "epoch": 0.92, + "learning_rate": 5.3976058144506204e-05, + "loss": 0.5871, + "step": 21630 + }, + { + "epoch": 0.92, + "learning_rate": 5.395468148781531e-05, + "loss": 0.552, + "step": 21640 + }, + { + "epoch": 0.92, + "learning_rate": 5.3933304831124406e-05, + "loss": 0.5365, + "step": 21650 + }, + { + "epoch": 0.92, + "learning_rate": 5.3911928174433525e-05, + "loss": 0.5981, + "step": 21660 + }, + { + "epoch": 0.92, + "learning_rate": 5.389055151774263e-05, + "loss": 0.5403, + "step": 21670 + }, + { + "epoch": 0.92, + "learning_rate": 5.386917486105173e-05, + "loss": 0.5914, + "step": 21680 + }, + { + "epoch": 0.93, + "learning_rate": 5.3847798204360845e-05, + "loss": 0.5402, + "step": 21690 + }, + { + "epoch": 0.93, + "learning_rate": 5.382642154766995e-05, + "loss": 0.5311, + "step": 21700 + }, + { + "epoch": 0.93, + "learning_rate": 5.3805044890979054e-05, + "loss": 0.5993, + "step": 21710 + }, + { + "epoch": 0.93, + "learning_rate": 5.3783668234288165e-05, + "loss": 0.5373, + "step": 21720 + }, + { + "epoch": 0.93, + "learning_rate": 5.376229157759727e-05, + "loss": 0.5993, + "step": 21730 + }, + { + "epoch": 0.93, + "learning_rate": 5.3740914920906374e-05, + "loss": 0.5491, + "step": 21740 + }, + { + "epoch": 0.93, + "learning_rate": 5.371953826421547e-05, + "loss": 0.5345, + "step": 21750 + }, + { + "epoch": 0.93, + "learning_rate": 5.369816160752459e-05, + "loss": 0.597, + "step": 21760 + }, + { + "epoch": 0.93, + "learning_rate": 5.3676784950833695e-05, + "loss": 0.5348, + "step": 21770 + }, + { + "epoch": 0.93, + "learning_rate": 5.365540829414279e-05, + "loss": 0.597, + "step": 21780 + }, + { + "epoch": 0.93, + "learning_rate": 5.363403163745191e-05, + "loss": 0.5489, + "step": 21790 + }, + { + "epoch": 0.93, + "learning_rate": 5.3612654980761015e-05, + "loss": 0.536, + "step": 21800 + }, + { + "epoch": 0.93, + "learning_rate": 5.359127832407011e-05, + "loss": 0.6037, + "step": 21810 + }, + { + "epoch": 0.93, + "learning_rate": 5.356990166737923e-05, + "loss": 0.5439, + "step": 21820 + }, + { + "epoch": 0.93, + "learning_rate": 5.3548525010688335e-05, + "loss": 0.5853, + "step": 21830 + }, + { + "epoch": 0.93, + "learning_rate": 5.352714835399743e-05, + "loss": 0.5537, + "step": 21840 + }, + { + "epoch": 0.93, + "learning_rate": 5.350577169730654e-05, + "loss": 0.5325, + "step": 21850 + }, + { + "epoch": 0.93, + "learning_rate": 5.3484395040615655e-05, + "loss": 0.5928, + "step": 21860 + }, + { + "epoch": 0.93, + "learning_rate": 5.346301838392476e-05, + "loss": 0.5373, + "step": 21870 + }, + { + "epoch": 0.93, + "learning_rate": 5.344164172723386e-05, + "loss": 0.5879, + "step": 21880 + }, + { + "epoch": 0.93, + "learning_rate": 5.3420265070542976e-05, + "loss": 0.5412, + "step": 21890 + }, + { + "epoch": 0.93, + "learning_rate": 5.339888841385208e-05, + "loss": 0.5255, + "step": 21900 + }, + { + "epoch": 0.93, + "learning_rate": 5.337751175716118e-05, + "loss": 0.5921, + "step": 21910 + }, + { + "epoch": 0.94, + "learning_rate": 5.335613510047028e-05, + "loss": 0.5433, + "step": 21920 + }, + { + "epoch": 0.94, + "learning_rate": 5.33347584437794e-05, + "loss": 0.5957, + "step": 21930 + }, + { + "epoch": 0.94, + "learning_rate": 5.33133817870885e-05, + "loss": 0.5426, + "step": 21940 + }, + { + "epoch": 0.94, + "learning_rate": 5.32920051303976e-05, + "loss": 0.5399, + "step": 21950 + }, + { + "epoch": 0.94, + "learning_rate": 5.327062847370672e-05, + "loss": 0.5924, + "step": 21960 + }, + { + "epoch": 0.94, + "learning_rate": 5.324925181701582e-05, + "loss": 0.5414, + "step": 21970 + }, + { + "epoch": 0.94, + "learning_rate": 5.322787516032492e-05, + "loss": 0.5904, + "step": 21980 + }, + { + "epoch": 0.94, + "learning_rate": 5.320649850363404e-05, + "loss": 0.5486, + "step": 21990 + }, + { + "epoch": 0.94, + "learning_rate": 5.318512184694314e-05, + "loss": 0.5334, + "step": 22000 + }, + { + "epoch": 0.94, + "learning_rate": 5.3163745190252243e-05, + "loss": 0.6065, + "step": 22010 + }, + { + "epoch": 0.94, + "learning_rate": 5.314236853356135e-05, + "loss": 0.5367, + "step": 22020 + }, + { + "epoch": 0.94, + "learning_rate": 5.3120991876870466e-05, + "loss": 0.5958, + "step": 22030 + }, + { + "epoch": 0.94, + "learning_rate": 5.3099615220179564e-05, + "loss": 0.5565, + "step": 22040 + }, + { + "epoch": 0.94, + "learning_rate": 5.307823856348867e-05, + "loss": 0.5287, + "step": 22050 + }, + { + "epoch": 0.94, + "learning_rate": 5.3056861906797786e-05, + "loss": 0.6005, + "step": 22060 + }, + { + "epoch": 0.94, + "learning_rate": 5.3035485250106884e-05, + "loss": 0.5422, + "step": 22070 + }, + { + "epoch": 0.94, + "learning_rate": 5.301410859341599e-05, + "loss": 0.5889, + "step": 22080 + }, + { + "epoch": 0.94, + "learning_rate": 5.2992731936725107e-05, + "loss": 0.5483, + "step": 22090 + }, + { + "epoch": 0.94, + "learning_rate": 5.2971355280034204e-05, + "loss": 0.5393, + "step": 22100 + }, + { + "epoch": 0.94, + "learning_rate": 5.294997862334331e-05, + "loss": 0.594, + "step": 22110 + }, + { + "epoch": 0.94, + "learning_rate": 5.292860196665241e-05, + "loss": 0.5376, + "step": 22120 + }, + { + "epoch": 0.94, + "learning_rate": 5.2907225309961525e-05, + "loss": 0.5964, + "step": 22130 + }, + { + "epoch": 0.94, + "learning_rate": 5.288584865327063e-05, + "loss": 0.5449, + "step": 22140 + }, + { + "epoch": 0.94, + "learning_rate": 5.2864471996579734e-05, + "loss": 0.5306, + "step": 22150 + }, + { + "epoch": 0.95, + "learning_rate": 5.284309533988885e-05, + "loss": 0.6048, + "step": 22160 + }, + { + "epoch": 0.95, + "learning_rate": 5.282171868319795e-05, + "loss": 0.5321, + "step": 22170 + }, + { + "epoch": 0.95, + "learning_rate": 5.2800342026507054e-05, + "loss": 0.5871, + "step": 22180 + }, + { + "epoch": 0.95, + "learning_rate": 5.277896536981616e-05, + "loss": 0.5379, + "step": 22190 + }, + { + "epoch": 0.95, + "learning_rate": 5.275758871312527e-05, + "loss": 0.5341, + "step": 22200 + }, + { + "epoch": 0.95, + "learning_rate": 5.2736212056434374e-05, + "loss": 0.596, + "step": 22210 + }, + { + "epoch": 0.95, + "learning_rate": 5.271483539974348e-05, + "loss": 0.536, + "step": 22220 + }, + { + "epoch": 0.95, + "learning_rate": 5.269345874305259e-05, + "loss": 0.5889, + "step": 22230 + }, + { + "epoch": 0.95, + "learning_rate": 5.2672082086361695e-05, + "loss": 0.5507, + "step": 22240 + }, + { + "epoch": 0.95, + "learning_rate": 5.26507054296708e-05, + "loss": 0.5277, + "step": 22250 + }, + { + "epoch": 0.95, + "learning_rate": 5.262932877297991e-05, + "loss": 0.6025, + "step": 22260 + }, + { + "epoch": 0.95, + "learning_rate": 5.2607952116289015e-05, + "loss": 0.5354, + "step": 22270 + }, + { + "epoch": 0.95, + "learning_rate": 5.258657545959812e-05, + "loss": 0.5819, + "step": 22280 + }, + { + "epoch": 0.95, + "learning_rate": 5.2565198802907224e-05, + "loss": 0.5543, + "step": 22290 + }, + { + "epoch": 0.95, + "learning_rate": 5.2543822146216335e-05, + "loss": 0.5331, + "step": 22300 + }, + { + "epoch": 0.95, + "learning_rate": 5.252244548952544e-05, + "loss": 0.6056, + "step": 22310 + }, + { + "epoch": 0.95, + "learning_rate": 5.2501068832834544e-05, + "loss": 0.5403, + "step": 22320 + }, + { + "epoch": 0.95, + "learning_rate": 5.2479692176143656e-05, + "loss": 0.5814, + "step": 22330 + }, + { + "epoch": 0.95, + "learning_rate": 5.245831551945276e-05, + "loss": 0.5445, + "step": 22340 + }, + { + "epoch": 0.95, + "learning_rate": 5.2436938862761865e-05, + "loss": 0.5346, + "step": 22350 + }, + { + "epoch": 0.95, + "learning_rate": 5.2415562206070976e-05, + "loss": 0.6059, + "step": 22360 + }, + { + "epoch": 0.95, + "learning_rate": 5.239418554938008e-05, + "loss": 0.5263, + "step": 22370 + }, + { + "epoch": 0.95, + "learning_rate": 5.2372808892689185e-05, + "loss": 0.5958, + "step": 22380 + }, + { + "epoch": 0.96, + "learning_rate": 5.235143223599829e-05, + "loss": 0.5538, + "step": 22390 + }, + { + "epoch": 0.96, + "learning_rate": 5.23300555793074e-05, + "loss": 0.5383, + "step": 22400 + }, + { + "epoch": 0.96, + "learning_rate": 5.2308678922616505e-05, + "loss": 0.589, + "step": 22410 + }, + { + "epoch": 0.96, + "learning_rate": 5.228730226592561e-05, + "loss": 0.5347, + "step": 22420 + }, + { + "epoch": 0.96, + "learning_rate": 5.226592560923472e-05, + "loss": 0.5917, + "step": 22430 + }, + { + "epoch": 0.96, + "learning_rate": 5.2244548952543825e-05, + "loss": 0.5486, + "step": 22440 + }, + { + "epoch": 0.96, + "learning_rate": 5.222317229585293e-05, + "loss": 0.5283, + "step": 22450 + }, + { + "epoch": 0.96, + "learning_rate": 5.2201795639162034e-05, + "loss": 0.6053, + "step": 22460 + }, + { + "epoch": 0.96, + "learning_rate": 5.2180418982471146e-05, + "loss": 0.5451, + "step": 22470 + }, + { + "epoch": 0.96, + "learning_rate": 5.215904232578025e-05, + "loss": 0.5886, + "step": 22480 + }, + { + "epoch": 0.96, + "learning_rate": 5.2137665669089355e-05, + "loss": 0.5372, + "step": 22490 + }, + { + "epoch": 0.96, + "learning_rate": 5.2116289012398466e-05, + "loss": 0.5437, + "step": 22500 + }, + { + "epoch": 0.96, + "learning_rate": 5.209491235570757e-05, + "loss": 0.6081, + "step": 22510 + }, + { + "epoch": 0.96, + "learning_rate": 5.2073535699016675e-05, + "loss": 0.5416, + "step": 22520 + }, + { + "epoch": 0.96, + "learning_rate": 5.2052159042325786e-05, + "loss": 0.5899, + "step": 22530 + }, + { + "epoch": 0.96, + "learning_rate": 5.203078238563489e-05, + "loss": 0.5455, + "step": 22540 + }, + { + "epoch": 0.96, + "learning_rate": 5.2009405728943995e-05, + "loss": 0.5375, + "step": 22550 + }, + { + "epoch": 0.96, + "learning_rate": 5.19880290722531e-05, + "loss": 0.5954, + "step": 22560 + }, + { + "epoch": 0.96, + "learning_rate": 5.196665241556221e-05, + "loss": 0.535, + "step": 22570 + }, + { + "epoch": 0.96, + "learning_rate": 5.1945275758871316e-05, + "loss": 0.603, + "step": 22580 + }, + { + "epoch": 0.96, + "learning_rate": 5.192389910218042e-05, + "loss": 0.5452, + "step": 22590 + }, + { + "epoch": 0.96, + "learning_rate": 5.190252244548953e-05, + "loss": 0.5336, + "step": 22600 + }, + { + "epoch": 0.96, + "learning_rate": 5.1881145788798636e-05, + "loss": 0.6026, + "step": 22610 + }, + { + "epoch": 0.97, + "learning_rate": 5.185976913210774e-05, + "loss": 0.5373, + "step": 22620 + }, + { + "epoch": 0.97, + "learning_rate": 5.183839247541685e-05, + "loss": 0.5843, + "step": 22630 + }, + { + "epoch": 0.97, + "learning_rate": 5.1817015818725956e-05, + "loss": 0.5488, + "step": 22640 + }, + { + "epoch": 0.97, + "learning_rate": 5.179563916203506e-05, + "loss": 0.5317, + "step": 22650 + }, + { + "epoch": 0.97, + "learning_rate": 5.1774262505344165e-05, + "loss": 0.5959, + "step": 22660 + }, + { + "epoch": 0.97, + "learning_rate": 5.175288584865328e-05, + "loss": 0.5265, + "step": 22670 + }, + { + "epoch": 0.97, + "learning_rate": 5.173150919196238e-05, + "loss": 0.5899, + "step": 22680 + }, + { + "epoch": 0.97, + "learning_rate": 5.1710132535271486e-05, + "loss": 0.5491, + "step": 22690 + }, + { + "epoch": 0.97, + "learning_rate": 5.16887558785806e-05, + "loss": 0.5309, + "step": 22700 + }, + { + "epoch": 0.97, + "learning_rate": 5.16673792218897e-05, + "loss": 0.5978, + "step": 22710 + }, + { + "epoch": 0.97, + "learning_rate": 5.1646002565198806e-05, + "loss": 0.5329, + "step": 22720 + }, + { + "epoch": 0.97, + "learning_rate": 5.1624625908507904e-05, + "loss": 0.5925, + "step": 22730 + }, + { + "epoch": 0.97, + "learning_rate": 5.160324925181702e-05, + "loss": 0.5447, + "step": 22740 + }, + { + "epoch": 0.97, + "learning_rate": 5.1581872595126126e-05, + "loss": 0.5336, + "step": 22750 + }, + { + "epoch": 0.97, + "learning_rate": 5.1560495938435224e-05, + "loss": 0.5929, + "step": 22760 + }, + { + "epoch": 0.97, + "learning_rate": 5.153911928174434e-05, + "loss": 0.542, + "step": 22770 + }, + { + "epoch": 0.97, + "learning_rate": 5.1517742625053447e-05, + "loss": 0.5914, + "step": 22780 + }, + { + "epoch": 0.97, + "learning_rate": 5.1496365968362544e-05, + "loss": 0.5471, + "step": 22790 + }, + { + "epoch": 0.97, + "learning_rate": 5.147498931167166e-05, + "loss": 0.5384, + "step": 22800 + }, + { + "epoch": 0.97, + "learning_rate": 5.145361265498077e-05, + "loss": 0.5976, + "step": 22810 + }, + { + "epoch": 0.97, + "learning_rate": 5.143223599828987e-05, + "loss": 0.5349, + "step": 22820 + }, + { + "epoch": 0.97, + "learning_rate": 5.141085934159897e-05, + "loss": 0.5939, + "step": 22830 + }, + { + "epoch": 0.97, + "learning_rate": 5.138948268490809e-05, + "loss": 0.5451, + "step": 22840 + }, + { + "epoch": 0.97, + "learning_rate": 5.136810602821719e-05, + "loss": 0.529, + "step": 22850 + }, + { + "epoch": 0.98, + "learning_rate": 5.134672937152629e-05, + "loss": 0.5986, + "step": 22860 + }, + { + "epoch": 0.98, + "learning_rate": 5.132535271483541e-05, + "loss": 0.5293, + "step": 22870 + }, + { + "epoch": 0.98, + "learning_rate": 5.130397605814451e-05, + "loss": 0.5804, + "step": 22880 + }, + { + "epoch": 0.98, + "learning_rate": 5.128259940145361e-05, + "loss": 0.5472, + "step": 22890 + }, + { + "epoch": 0.98, + "learning_rate": 5.126122274476273e-05, + "loss": 0.5312, + "step": 22900 + }, + { + "epoch": 0.98, + "learning_rate": 5.123984608807183e-05, + "loss": 0.5972, + "step": 22910 + }, + { + "epoch": 0.98, + "learning_rate": 5.121846943138093e-05, + "loss": 0.5313, + "step": 22920 + }, + { + "epoch": 0.98, + "learning_rate": 5.1197092774690035e-05, + "loss": 0.5808, + "step": 22930 + }, + { + "epoch": 0.98, + "learning_rate": 5.117571611799915e-05, + "loss": 0.5413, + "step": 22940 + }, + { + "epoch": 0.98, + "learning_rate": 5.115433946130826e-05, + "loss": 0.5328, + "step": 22950 + }, + { + "epoch": 0.98, + "learning_rate": 5.1132962804617355e-05, + "loss": 0.604, + "step": 22960 + }, + { + "epoch": 0.98, + "learning_rate": 5.111158614792647e-05, + "loss": 0.5273, + "step": 22970 + }, + { + "epoch": 0.98, + "learning_rate": 5.109020949123558e-05, + "loss": 0.5921, + "step": 22980 + }, + { + "epoch": 0.98, + "learning_rate": 5.1068832834544675e-05, + "loss": 0.5493, + "step": 22990 + }, + { + "epoch": 0.98, + "learning_rate": 5.104745617785378e-05, + "loss": 0.5316, + "step": 23000 + }, + { + "epoch": 0.98, + "learning_rate": 5.10260795211629e-05, + "loss": 0.5947, + "step": 23010 + }, + { + "epoch": 0.98, + "learning_rate": 5.1004702864471996e-05, + "loss": 0.5234, + "step": 23020 + }, + { + "epoch": 0.98, + "learning_rate": 5.09833262077811e-05, + "loss": 0.5956, + "step": 23030 + }, + { + "epoch": 0.98, + "learning_rate": 5.096194955109022e-05, + "loss": 0.536, + "step": 23040 + }, + { + "epoch": 0.98, + "learning_rate": 5.0940572894399316e-05, + "loss": 0.5252, + "step": 23050 + }, + { + "epoch": 0.98, + "learning_rate": 5.091919623770842e-05, + "loss": 0.5896, + "step": 23060 + }, + { + "epoch": 0.98, + "learning_rate": 5.089781958101754e-05, + "loss": 0.5313, + "step": 23070 + }, + { + "epoch": 0.98, + "learning_rate": 5.0876442924326636e-05, + "loss": 0.5927, + "step": 23080 + }, + { + "epoch": 0.99, + "learning_rate": 5.085506626763574e-05, + "loss": 0.5454, + "step": 23090 + }, + { + "epoch": 0.99, + "learning_rate": 5.0833689610944845e-05, + "loss": 0.5203, + "step": 23100 + }, + { + "epoch": 0.99, + "learning_rate": 5.081231295425396e-05, + "loss": 0.5987, + "step": 23110 + }, + { + "epoch": 0.99, + "learning_rate": 5.079093629756306e-05, + "loss": 0.5267, + "step": 23120 + }, + { + "epoch": 0.99, + "learning_rate": 5.0769559640872165e-05, + "loss": 0.5954, + "step": 23130 + }, + { + "epoch": 0.99, + "learning_rate": 5.0748182984181284e-05, + "loss": 0.5473, + "step": 23140 + }, + { + "epoch": 0.99, + "learning_rate": 5.072680632749038e-05, + "loss": 0.5311, + "step": 23150 + }, + { + "epoch": 0.99, + "learning_rate": 5.0705429670799486e-05, + "loss": 0.6048, + "step": 23160 + }, + { + "epoch": 0.99, + "learning_rate": 5.0684053014108604e-05, + "loss": 0.5327, + "step": 23170 + }, + { + "epoch": 0.99, + "learning_rate": 5.06626763574177e-05, + "loss": 0.588, + "step": 23180 + }, + { + "epoch": 0.99, + "learning_rate": 5.0641299700726806e-05, + "loss": 0.5481, + "step": 23190 + }, + { + "epoch": 0.99, + "learning_rate": 5.061992304403591e-05, + "loss": 0.5284, + "step": 23200 + }, + { + "epoch": 0.99, + "learning_rate": 5.059854638734502e-05, + "loss": 0.594, + "step": 23210 + }, + { + "epoch": 0.99, + "learning_rate": 5.0577169730654126e-05, + "loss": 0.5426, + "step": 23220 + }, + { + "epoch": 0.99, + "learning_rate": 5.055579307396323e-05, + "loss": 0.5884, + "step": 23230 + }, + { + "epoch": 0.99, + "learning_rate": 5.053441641727234e-05, + "loss": 0.5538, + "step": 23240 + }, + { + "epoch": 0.99, + "learning_rate": 5.051303976058145e-05, + "loss": 0.5268, + "step": 23250 + }, + { + "epoch": 0.99, + "learning_rate": 5.049166310389055e-05, + "loss": 0.5914, + "step": 23260 + }, + { + "epoch": 0.99, + "learning_rate": 5.0470286447199656e-05, + "loss": 0.5324, + "step": 23270 + }, + { + "epoch": 0.99, + "learning_rate": 5.044890979050877e-05, + "loss": 0.5883, + "step": 23280 + }, + { + "epoch": 0.99, + "learning_rate": 5.042753313381787e-05, + "loss": 0.5476, + "step": 23290 + }, + { + "epoch": 0.99, + "learning_rate": 5.0406156477126976e-05, + "loss": 0.5272, + "step": 23300 + }, + { + "epoch": 0.99, + "learning_rate": 5.038477982043609e-05, + "loss": 0.5893, + "step": 23310 + }, + { + "epoch": 0.99, + "learning_rate": 5.036340316374519e-05, + "loss": 0.5306, + "step": 23320 + }, + { + "epoch": 1.0, + "learning_rate": 5.0342026507054296e-05, + "loss": 0.5917, + "step": 23330 + }, + { + "epoch": 1.0, + "learning_rate": 5.032064985036341e-05, + "loss": 0.5453, + "step": 23340 + }, + { + "epoch": 1.0, + "learning_rate": 5.029927319367251e-05, + "loss": 0.5221, + "step": 23350 + }, + { + "epoch": 1.0, + "learning_rate": 5.027789653698162e-05, + "loss": 0.5987, + "step": 23360 + }, + { + "epoch": 1.0, + "learning_rate": 5.025651988029072e-05, + "loss": 0.5334, + "step": 23370 + }, + { + "epoch": 1.0, + "learning_rate": 5.023514322359983e-05, + "loss": 0.586, + "step": 23380 + }, + { + "epoch": 1.0, + "learning_rate": 5.021376656690894e-05, + "loss": 0.5444, + "step": 23390 + }, + { + "epoch": 1.0, + "learning_rate": 5.019238991021804e-05, + "loss": 0.5238, + "step": 23400 + }, + { + "epoch": 1.0, + "learning_rate": 5.017101325352715e-05, + "loss": 0.5938, + "step": 23410 + }, + { + "epoch": 1.0, + "learning_rate": 5.014963659683626e-05, + "loss": 0.5414, + "step": 23420 + }, + { + "epoch": 1.0, + "learning_rate": 5.012825994014536e-05, + "loss": 0.5831, + "step": 23430 + }, + { + "epoch": 1.0, + "learning_rate": 5.010688328345447e-05, + "loss": 0.5288, + "step": 23440 + }, + { + "epoch": 1.0, + "learning_rate": 5.008550662676358e-05, + "loss": 0.5849, + "step": 23450 + }, + { + "epoch": 1.0, + "learning_rate": 5.006412997007268e-05, + "loss": 0.5317, + "step": 23460 + }, + { + "epoch": 1.0, + "learning_rate": 5.0042753313381787e-05, + "loss": 0.5916, + "step": 23470 + }, + { + "epoch": 1.0, + "learning_rate": 5.00213766566909e-05, + "loss": 0.549, + "step": 23480 + }, + { + "epoch": 1.0, + "learning_rate": 5e-05, + "loss": 0.5417, + "step": 23490 + }, + { + "epoch": 1.0, + "learning_rate": 4.9978623343309114e-05, + "loss": 0.5777, + "step": 23500 + }, + { + "epoch": 1.0, + "learning_rate": 4.995724668661821e-05, + "loss": 0.5373, + "step": 23510 + }, + { + "epoch": 1.0, + "learning_rate": 4.993587002992732e-05, + "loss": 0.598, + "step": 23520 + }, + { + "epoch": 1.0, + "learning_rate": 4.991449337323643e-05, + "loss": 0.5405, + "step": 23530 + }, + { + "epoch": 1.0, + "learning_rate": 4.989311671654553e-05, + "loss": 0.5587, + "step": 23540 + }, + { + "epoch": 1.0, + "learning_rate": 4.987174005985464e-05, + "loss": 0.5787, + "step": 23550 + }, + { + "epoch": 1.01, + "learning_rate": 4.985036340316375e-05, + "loss": 0.5273, + "step": 23560 + }, + { + "epoch": 1.01, + "learning_rate": 4.982898674647286e-05, + "loss": 0.5845, + "step": 23570 + }, + { + "epoch": 1.01, + "learning_rate": 4.9807610089781957e-05, + "loss": 0.5433, + "step": 23580 + }, + { + "epoch": 1.01, + "learning_rate": 4.978623343309107e-05, + "loss": 0.5581, + "step": 23590 + }, + { + "epoch": 1.01, + "learning_rate": 4.976485677640018e-05, + "loss": 0.5769, + "step": 23600 + }, + { + "epoch": 1.01, + "learning_rate": 4.974348011970928e-05, + "loss": 0.5288, + "step": 23610 + }, + { + "epoch": 1.01, + "learning_rate": 4.972210346301839e-05, + "loss": 0.585, + "step": 23620 + }, + { + "epoch": 1.01, + "learning_rate": 4.970072680632749e-05, + "loss": 0.5348, + "step": 23630 + }, + { + "epoch": 1.01, + "learning_rate": 4.96793501496366e-05, + "loss": 0.5482, + "step": 23640 + }, + { + "epoch": 1.01, + "learning_rate": 4.96579734929457e-05, + "loss": 0.583, + "step": 23650 + }, + { + "epoch": 1.01, + "learning_rate": 4.963659683625481e-05, + "loss": 0.5376, + "step": 23660 + }, + { + "epoch": 1.01, + "learning_rate": 4.961522017956392e-05, + "loss": 0.5879, + "step": 23670 + }, + { + "epoch": 1.01, + "learning_rate": 4.959384352287302e-05, + "loss": 0.532, + "step": 23680 + }, + { + "epoch": 1.01, + "learning_rate": 4.957246686618213e-05, + "loss": 0.5575, + "step": 23690 + }, + { + "epoch": 1.01, + "learning_rate": 4.955109020949124e-05, + "loss": 0.5753, + "step": 23700 + }, + { + "epoch": 1.01, + "learning_rate": 4.952971355280034e-05, + "loss": 0.5337, + "step": 23710 + }, + { + "epoch": 1.01, + "learning_rate": 4.9508336896109454e-05, + "loss": 0.5888, + "step": 23720 + }, + { + "epoch": 1.01, + "learning_rate": 4.948696023941856e-05, + "loss": 0.5491, + "step": 23730 + }, + { + "epoch": 1.01, + "learning_rate": 4.946558358272766e-05, + "loss": 0.5513, + "step": 23740 + }, + { + "epoch": 1.01, + "learning_rate": 4.944420692603677e-05, + "loss": 0.5789, + "step": 23750 + }, + { + "epoch": 1.01, + "learning_rate": 4.942283026934588e-05, + "loss": 0.5258, + "step": 23760 + }, + { + "epoch": 1.01, + "learning_rate": 4.940145361265498e-05, + "loss": 0.5841, + "step": 23770 + }, + { + "epoch": 1.01, + "learning_rate": 4.938007695596409e-05, + "loss": 0.5401, + "step": 23780 + }, + { + "epoch": 1.01, + "learning_rate": 4.93587002992732e-05, + "loss": 0.55, + "step": 23790 + }, + { + "epoch": 1.02, + "learning_rate": 4.93373236425823e-05, + "loss": 0.5778, + "step": 23800 + }, + { + "epoch": 1.02, + "learning_rate": 4.931594698589141e-05, + "loss": 0.528, + "step": 23810 + }, + { + "epoch": 1.02, + "learning_rate": 4.929457032920052e-05, + "loss": 0.5883, + "step": 23820 + }, + { + "epoch": 1.02, + "learning_rate": 4.9273193672509624e-05, + "loss": 0.537, + "step": 23830 + }, + { + "epoch": 1.02, + "learning_rate": 4.925181701581873e-05, + "loss": 0.5454, + "step": 23840 + }, + { + "epoch": 1.02, + "learning_rate": 4.923044035912783e-05, + "loss": 0.5789, + "step": 23850 + }, + { + "epoch": 1.02, + "learning_rate": 4.9209063702436944e-05, + "loss": 0.5159, + "step": 23860 + }, + { + "epoch": 1.02, + "learning_rate": 4.918768704574605e-05, + "loss": 0.5968, + "step": 23870 + }, + { + "epoch": 1.02, + "learning_rate": 4.916631038905515e-05, + "loss": 0.5452, + "step": 23880 + }, + { + "epoch": 1.02, + "learning_rate": 4.9144933732364264e-05, + "loss": 0.5529, + "step": 23890 + }, + { + "epoch": 1.02, + "learning_rate": 4.912355707567337e-05, + "loss": 0.58, + "step": 23900 + }, + { + "epoch": 1.02, + "learning_rate": 4.910218041898247e-05, + "loss": 0.5335, + "step": 23910 + }, + { + "epoch": 1.02, + "learning_rate": 4.908080376229158e-05, + "loss": 0.5914, + "step": 23920 + }, + { + "epoch": 1.02, + "learning_rate": 4.905942710560069e-05, + "loss": 0.5343, + "step": 23930 + }, + { + "epoch": 1.02, + "learning_rate": 4.9038050448909793e-05, + "loss": 0.5404, + "step": 23940 + }, + { + "epoch": 1.02, + "learning_rate": 4.90166737922189e-05, + "loss": 0.5808, + "step": 23950 + }, + { + "epoch": 1.02, + "learning_rate": 4.899529713552801e-05, + "loss": 0.5242, + "step": 23960 + }, + { + "epoch": 1.02, + "learning_rate": 4.897392047883711e-05, + "loss": 0.5949, + "step": 23970 + }, + { + "epoch": 1.02, + "learning_rate": 4.895254382214622e-05, + "loss": 0.5383, + "step": 23980 + }, + { + "epoch": 1.02, + "learning_rate": 4.893116716545533e-05, + "loss": 0.5473, + "step": 23990 + }, + { + "epoch": 1.02, + "learning_rate": 4.890979050876443e-05, + "loss": 0.5743, + "step": 24000 + }, + { + "epoch": 1.02, + "learning_rate": 4.888841385207354e-05, + "loss": 0.5273, + "step": 24010 + }, + { + "epoch": 1.02, + "learning_rate": 4.886703719538264e-05, + "loss": 0.5894, + "step": 24020 + }, + { + "epoch": 1.03, + "learning_rate": 4.884566053869175e-05, + "loss": 0.5348, + "step": 24030 + }, + { + "epoch": 1.03, + "learning_rate": 4.882428388200086e-05, + "loss": 0.5482, + "step": 24040 + }, + { + "epoch": 1.03, + "learning_rate": 4.8802907225309963e-05, + "loss": 0.5778, + "step": 24050 + }, + { + "epoch": 1.03, + "learning_rate": 4.8781530568619075e-05, + "loss": 0.5201, + "step": 24060 + }, + { + "epoch": 1.03, + "learning_rate": 4.876015391192817e-05, + "loss": 0.593, + "step": 24070 + }, + { + "epoch": 1.03, + "learning_rate": 4.8738777255237284e-05, + "loss": 0.5403, + "step": 24080 + }, + { + "epoch": 1.03, + "learning_rate": 4.8717400598546395e-05, + "loss": 0.5416, + "step": 24090 + }, + { + "epoch": 1.03, + "learning_rate": 4.869602394185549e-05, + "loss": 0.5837, + "step": 24100 + }, + { + "epoch": 1.03, + "learning_rate": 4.8674647285164604e-05, + "loss": 0.5294, + "step": 24110 + }, + { + "epoch": 1.03, + "learning_rate": 4.865327062847371e-05, + "loss": 0.5952, + "step": 24120 + }, + { + "epoch": 1.03, + "learning_rate": 4.863189397178281e-05, + "loss": 0.5371, + "step": 24130 + }, + { + "epoch": 1.03, + "learning_rate": 4.8610517315091924e-05, + "loss": 0.546, + "step": 24140 + }, + { + "epoch": 1.03, + "learning_rate": 4.858914065840103e-05, + "loss": 0.5801, + "step": 24150 + }, + { + "epoch": 1.03, + "learning_rate": 4.856776400171013e-05, + "loss": 0.5211, + "step": 24160 + }, + { + "epoch": 1.03, + "learning_rate": 4.854638734501924e-05, + "loss": 0.5887, + "step": 24170 + }, + { + "epoch": 1.03, + "learning_rate": 4.852501068832835e-05, + "loss": 0.5422, + "step": 24180 + }, + { + "epoch": 1.03, + "learning_rate": 4.8503634031637454e-05, + "loss": 0.5523, + "step": 24190 + }, + { + "epoch": 1.03, + "learning_rate": 4.848225737494656e-05, + "loss": 0.5763, + "step": 24200 + }, + { + "epoch": 1.03, + "learning_rate": 4.846088071825567e-05, + "loss": 0.527, + "step": 24210 + }, + { + "epoch": 1.03, + "learning_rate": 4.8439504061564774e-05, + "loss": 0.5969, + "step": 24220 + }, + { + "epoch": 1.03, + "learning_rate": 4.841812740487388e-05, + "loss": 0.5448, + "step": 24230 + }, + { + "epoch": 1.03, + "learning_rate": 4.839675074818298e-05, + "loss": 0.548, + "step": 24240 + }, + { + "epoch": 1.03, + "learning_rate": 4.8375374091492094e-05, + "loss": 0.5769, + "step": 24250 + }, + { + "epoch": 1.03, + "learning_rate": 4.83539974348012e-05, + "loss": 0.5333, + "step": 24260 + }, + { + "epoch": 1.04, + "learning_rate": 4.83326207781103e-05, + "loss": 0.5889, + "step": 24270 + }, + { + "epoch": 1.04, + "learning_rate": 4.8311244121419415e-05, + "loss": 0.5393, + "step": 24280 + }, + { + "epoch": 1.04, + "learning_rate": 4.828986746472852e-05, + "loss": 0.5529, + "step": 24290 + }, + { + "epoch": 1.04, + "learning_rate": 4.8268490808037624e-05, + "loss": 0.5698, + "step": 24300 + }, + { + "epoch": 1.04, + "learning_rate": 4.8247114151346735e-05, + "loss": 0.5255, + "step": 24310 + }, + { + "epoch": 1.04, + "learning_rate": 4.822573749465584e-05, + "loss": 0.5868, + "step": 24320 + }, + { + "epoch": 1.04, + "learning_rate": 4.8204360837964944e-05, + "loss": 0.5432, + "step": 24330 + }, + { + "epoch": 1.04, + "learning_rate": 4.818298418127405e-05, + "loss": 0.5575, + "step": 24340 + }, + { + "epoch": 1.04, + "learning_rate": 4.816160752458316e-05, + "loss": 0.575, + "step": 24350 + }, + { + "epoch": 1.04, + "learning_rate": 4.8140230867892264e-05, + "loss": 0.5231, + "step": 24360 + }, + { + "epoch": 1.04, + "learning_rate": 4.811885421120137e-05, + "loss": 0.5865, + "step": 24370 + }, + { + "epoch": 1.04, + "learning_rate": 4.809747755451048e-05, + "loss": 0.5369, + "step": 24380 + }, + { + "epoch": 1.04, + "learning_rate": 4.8076100897819585e-05, + "loss": 0.5546, + "step": 24390 + }, + { + "epoch": 1.04, + "learning_rate": 4.805472424112869e-05, + "loss": 0.5707, + "step": 24400 + }, + { + "epoch": 1.04, + "learning_rate": 4.80333475844378e-05, + "loss": 0.5367, + "step": 24410 + }, + { + "epoch": 1.04, + "learning_rate": 4.8011970927746905e-05, + "loss": 0.5872, + "step": 24420 + }, + { + "epoch": 1.04, + "learning_rate": 4.799059427105601e-05, + "loss": 0.544, + "step": 24430 + }, + { + "epoch": 1.04, + "learning_rate": 4.7969217614365114e-05, + "loss": 0.5423, + "step": 24440 + }, + { + "epoch": 1.04, + "learning_rate": 4.7947840957674225e-05, + "loss": 0.5802, + "step": 24450 + }, + { + "epoch": 1.04, + "learning_rate": 4.792646430098332e-05, + "loss": 0.5309, + "step": 24460 + }, + { + "epoch": 1.04, + "learning_rate": 4.7905087644292434e-05, + "loss": 0.5887, + "step": 24470 + }, + { + "epoch": 1.04, + "learning_rate": 4.7883710987601545e-05, + "loss": 0.5405, + "step": 24480 + }, + { + "epoch": 1.04, + "learning_rate": 4.786233433091064e-05, + "loss": 0.5565, + "step": 24490 + }, + { + "epoch": 1.05, + "learning_rate": 4.7840957674219754e-05, + "loss": 0.5757, + "step": 24500 + }, + { + "epoch": 1.05, + "learning_rate": 4.781958101752886e-05, + "loss": 0.533, + "step": 24510 + }, + { + "epoch": 1.05, + "learning_rate": 4.779820436083797e-05, + "loss": 0.589, + "step": 24520 + }, + { + "epoch": 1.05, + "learning_rate": 4.7776827704147075e-05, + "loss": 0.5461, + "step": 24530 + }, + { + "epoch": 1.05, + "learning_rate": 4.775545104745618e-05, + "loss": 0.5465, + "step": 24540 + }, + { + "epoch": 1.05, + "learning_rate": 4.773407439076529e-05, + "loss": 0.5839, + "step": 24550 + }, + { + "epoch": 1.05, + "learning_rate": 4.771269773407439e-05, + "loss": 0.517, + "step": 24560 + }, + { + "epoch": 1.05, + "learning_rate": 4.76913210773835e-05, + "loss": 0.5962, + "step": 24570 + }, + { + "epoch": 1.05, + "learning_rate": 4.766994442069261e-05, + "loss": 0.5357, + "step": 24580 + }, + { + "epoch": 1.05, + "learning_rate": 4.764856776400171e-05, + "loss": 0.5511, + "step": 24590 + }, + { + "epoch": 1.05, + "learning_rate": 4.762719110731082e-05, + "loss": 0.581, + "step": 24600 + }, + { + "epoch": 1.05, + "learning_rate": 4.7605814450619924e-05, + "loss": 0.5222, + "step": 24610 + }, + { + "epoch": 1.05, + "learning_rate": 4.758443779392903e-05, + "loss": 0.5949, + "step": 24620 + }, + { + "epoch": 1.05, + "learning_rate": 4.756306113723814e-05, + "loss": 0.535, + "step": 24630 + }, + { + "epoch": 1.05, + "learning_rate": 4.7541684480547245e-05, + "loss": 0.5442, + "step": 24640 + }, + { + "epoch": 1.05, + "learning_rate": 4.752030782385635e-05, + "loss": 0.567, + "step": 24650 + }, + { + "epoch": 1.05, + "learning_rate": 4.7498931167165454e-05, + "loss": 0.5259, + "step": 24660 + }, + { + "epoch": 1.05, + "learning_rate": 4.7477554510474565e-05, + "loss": 0.5927, + "step": 24670 + }, + { + "epoch": 1.05, + "learning_rate": 4.7456177853783676e-05, + "loss": 0.5418, + "step": 24680 + }, + { + "epoch": 1.05, + "learning_rate": 4.7434801197092774e-05, + "loss": 0.5488, + "step": 24690 + }, + { + "epoch": 1.05, + "learning_rate": 4.7413424540401885e-05, + "loss": 0.577, + "step": 24700 + }, + { + "epoch": 1.05, + "learning_rate": 4.739204788371099e-05, + "loss": 0.533, + "step": 24710 + }, + { + "epoch": 1.05, + "learning_rate": 4.7370671227020094e-05, + "loss": 0.5874, + "step": 24720 + }, + { + "epoch": 1.06, + "learning_rate": 4.73492945703292e-05, + "loss": 0.5453, + "step": 24730 + }, + { + "epoch": 1.06, + "learning_rate": 4.732791791363831e-05, + "loss": 0.5532, + "step": 24740 + }, + { + "epoch": 1.06, + "learning_rate": 4.7306541256947415e-05, + "loss": 0.5677, + "step": 24750 + }, + { + "epoch": 1.06, + "learning_rate": 4.728516460025652e-05, + "loss": 0.5225, + "step": 24760 + }, + { + "epoch": 1.06, + "learning_rate": 4.726378794356563e-05, + "loss": 0.5891, + "step": 24770 + }, + { + "epoch": 1.06, + "learning_rate": 4.7242411286874735e-05, + "loss": 0.5469, + "step": 24780 + }, + { + "epoch": 1.06, + "learning_rate": 4.722103463018384e-05, + "loss": 0.5421, + "step": 24790 + }, + { + "epoch": 1.06, + "learning_rate": 4.719965797349295e-05, + "loss": 0.5721, + "step": 24800 + }, + { + "epoch": 1.06, + "learning_rate": 4.7178281316802055e-05, + "loss": 0.5333, + "step": 24810 + }, + { + "epoch": 1.06, + "learning_rate": 4.715690466011116e-05, + "loss": 0.5845, + "step": 24820 + }, + { + "epoch": 1.06, + "learning_rate": 4.7135528003420264e-05, + "loss": 0.5392, + "step": 24830 + }, + { + "epoch": 1.06, + "learning_rate": 4.7114151346729376e-05, + "loss": 0.5587, + "step": 24840 + }, + { + "epoch": 1.06, + "learning_rate": 4.709277469003848e-05, + "loss": 0.5754, + "step": 24850 + }, + { + "epoch": 1.06, + "learning_rate": 4.7071398033347585e-05, + "loss": 0.5245, + "step": 24860 + }, + { + "epoch": 1.06, + "learning_rate": 4.7050021376656696e-05, + "loss": 0.5847, + "step": 24870 + }, + { + "epoch": 1.06, + "learning_rate": 4.70286447199658e-05, + "loss": 0.537, + "step": 24880 + }, + { + "epoch": 1.06, + "learning_rate": 4.7007268063274905e-05, + "loss": 0.5571, + "step": 24890 + }, + { + "epoch": 1.06, + "learning_rate": 4.6985891406584016e-05, + "loss": 0.5724, + "step": 24900 + }, + { + "epoch": 1.06, + "learning_rate": 4.696451474989312e-05, + "loss": 0.5285, + "step": 24910 + }, + { + "epoch": 1.06, + "learning_rate": 4.6943138093202225e-05, + "loss": 0.5838, + "step": 24920 + }, + { + "epoch": 1.06, + "learning_rate": 4.692176143651133e-05, + "loss": 0.5317, + "step": 24930 + }, + { + "epoch": 1.06, + "learning_rate": 4.690038477982044e-05, + "loss": 0.5509, + "step": 24940 + }, + { + "epoch": 1.06, + "learning_rate": 4.6879008123129546e-05, + "loss": 0.574, + "step": 24950 + }, + { + "epoch": 1.06, + "learning_rate": 4.685763146643865e-05, + "loss": 0.5316, + "step": 24960 + }, + { + "epoch": 1.07, + "learning_rate": 4.683625480974776e-05, + "loss": 0.5902, + "step": 24970 + }, + { + "epoch": 1.07, + "learning_rate": 4.6814878153056866e-05, + "loss": 0.5352, + "step": 24980 + }, + { + "epoch": 1.07, + "learning_rate": 4.679350149636597e-05, + "loss": 0.5433, + "step": 24990 + }, + { + "epoch": 1.07, + "learning_rate": 4.6772124839675075e-05, + "loss": 0.5714, + "step": 25000 + }, + { + "epoch": 1.07, + "learning_rate": 4.6750748182984186e-05, + "loss": 0.5218, + "step": 25010 + }, + { + "epoch": 1.07, + "learning_rate": 4.672937152629329e-05, + "loss": 0.5883, + "step": 25020 + }, + { + "epoch": 1.07, + "learning_rate": 4.6707994869602395e-05, + "loss": 0.5308, + "step": 25030 + }, + { + "epoch": 1.07, + "learning_rate": 4.6686618212911506e-05, + "loss": 0.5534, + "step": 25040 + }, + { + "epoch": 1.07, + "learning_rate": 4.6665241556220604e-05, + "loss": 0.5727, + "step": 25050 + }, + { + "epoch": 1.07, + "learning_rate": 4.6643864899529716e-05, + "loss": 0.5192, + "step": 25060 + }, + { + "epoch": 1.07, + "learning_rate": 4.662248824283883e-05, + "loss": 0.592, + "step": 25070 + }, + { + "epoch": 1.07, + "learning_rate": 4.6601111586147925e-05, + "loss": 0.5351, + "step": 25080 + }, + { + "epoch": 1.07, + "learning_rate": 4.6579734929457036e-05, + "loss": 0.5477, + "step": 25090 + }, + { + "epoch": 1.07, + "learning_rate": 4.655835827276614e-05, + "loss": 0.5653, + "step": 25100 + }, + { + "epoch": 1.07, + "learning_rate": 4.6536981616075245e-05, + "loss": 0.5237, + "step": 25110 + }, + { + "epoch": 1.07, + "learning_rate": 4.6515604959384356e-05, + "loss": 0.5817, + "step": 25120 + }, + { + "epoch": 1.07, + "learning_rate": 4.649422830269346e-05, + "loss": 0.538, + "step": 25130 + }, + { + "epoch": 1.07, + "learning_rate": 4.647285164600257e-05, + "loss": 0.542, + "step": 25140 + }, + { + "epoch": 1.07, + "learning_rate": 4.645147498931167e-05, + "loss": 0.5679, + "step": 25150 + }, + { + "epoch": 1.07, + "learning_rate": 4.643009833262078e-05, + "loss": 0.5198, + "step": 25160 + }, + { + "epoch": 1.07, + "learning_rate": 4.640872167592989e-05, + "loss": 0.5858, + "step": 25170 + }, + { + "epoch": 1.07, + "learning_rate": 4.638734501923899e-05, + "loss": 0.5407, + "step": 25180 + }, + { + "epoch": 1.07, + "learning_rate": 4.63659683625481e-05, + "loss": 0.5444, + "step": 25190 + }, + { + "epoch": 1.08, + "learning_rate": 4.6344591705857206e-05, + "loss": 0.5826, + "step": 25200 + }, + { + "epoch": 1.08, + "learning_rate": 4.632321504916631e-05, + "loss": 0.5236, + "step": 25210 + }, + { + "epoch": 1.08, + "learning_rate": 4.630183839247542e-05, + "loss": 0.5757, + "step": 25220 + }, + { + "epoch": 1.08, + "learning_rate": 4.6280461735784526e-05, + "loss": 0.5303, + "step": 25230 + }, + { + "epoch": 1.08, + "learning_rate": 4.625908507909363e-05, + "loss": 0.5476, + "step": 25240 + }, + { + "epoch": 1.08, + "learning_rate": 4.6237708422402735e-05, + "loss": 0.5724, + "step": 25250 + }, + { + "epoch": 1.08, + "learning_rate": 4.6216331765711846e-05, + "loss": 0.5213, + "step": 25260 + }, + { + "epoch": 1.08, + "learning_rate": 4.619495510902095e-05, + "loss": 0.5897, + "step": 25270 + }, + { + "epoch": 1.08, + "learning_rate": 4.6173578452330055e-05, + "loss": 0.537, + "step": 25280 + }, + { + "epoch": 1.08, + "learning_rate": 4.615220179563917e-05, + "loss": 0.5486, + "step": 25290 + }, + { + "epoch": 1.08, + "learning_rate": 4.613082513894827e-05, + "loss": 0.5758, + "step": 25300 + }, + { + "epoch": 1.08, + "learning_rate": 4.6109448482257376e-05, + "loss": 0.5252, + "step": 25310 + }, + { + "epoch": 1.08, + "learning_rate": 4.608807182556648e-05, + "loss": 0.5948, + "step": 25320 + }, + { + "epoch": 1.08, + "learning_rate": 4.606669516887559e-05, + "loss": 0.5435, + "step": 25330 + }, + { + "epoch": 1.08, + "learning_rate": 4.6045318512184696e-05, + "loss": 0.546, + "step": 25340 + }, + { + "epoch": 1.08, + "learning_rate": 4.60239418554938e-05, + "loss": 0.5653, + "step": 25350 + }, + { + "epoch": 1.08, + "learning_rate": 4.600256519880291e-05, + "loss": 0.5221, + "step": 25360 + }, + { + "epoch": 1.08, + "learning_rate": 4.5981188542112016e-05, + "loss": 0.5843, + "step": 25370 + }, + { + "epoch": 1.08, + "learning_rate": 4.595981188542112e-05, + "loss": 0.5438, + "step": 25380 + }, + { + "epoch": 1.08, + "learning_rate": 4.593843522873023e-05, + "loss": 0.5431, + "step": 25390 + }, + { + "epoch": 1.08, + "learning_rate": 4.591705857203934e-05, + "loss": 0.5682, + "step": 25400 + }, + { + "epoch": 1.08, + "learning_rate": 4.589568191534844e-05, + "loss": 0.5266, + "step": 25410 + }, + { + "epoch": 1.08, + "learning_rate": 4.5874305258657546e-05, + "loss": 0.5849, + "step": 25420 + }, + { + "epoch": 1.08, + "learning_rate": 4.585292860196666e-05, + "loss": 0.5282, + "step": 25430 + }, + { + "epoch": 1.09, + "learning_rate": 4.583155194527576e-05, + "loss": 0.5434, + "step": 25440 + }, + { + "epoch": 1.09, + "learning_rate": 4.5810175288584866e-05, + "loss": 0.5651, + "step": 25450 + }, + { + "epoch": 1.09, + "learning_rate": 4.578879863189398e-05, + "loss": 0.5248, + "step": 25460 + }, + { + "epoch": 1.09, + "learning_rate": 4.576742197520308e-05, + "loss": 0.5971, + "step": 25470 + }, + { + "epoch": 1.09, + "learning_rate": 4.5746045318512186e-05, + "loss": 0.5333, + "step": 25480 + }, + { + "epoch": 1.09, + "learning_rate": 4.57246686618213e-05, + "loss": 0.553, + "step": 25490 + }, + { + "epoch": 1.09, + "learning_rate": 4.57032920051304e-05, + "loss": 0.5732, + "step": 25500 + }, + { + "epoch": 1.09, + "learning_rate": 4.5681915348439507e-05, + "loss": 0.5294, + "step": 25510 + }, + { + "epoch": 1.09, + "learning_rate": 4.566053869174861e-05, + "loss": 0.5921, + "step": 25520 + }, + { + "epoch": 1.09, + "learning_rate": 4.563916203505772e-05, + "loss": 0.5386, + "step": 25530 + }, + { + "epoch": 1.09, + "learning_rate": 4.561778537836682e-05, + "loss": 0.5424, + "step": 25540 + }, + { + "epoch": 1.09, + "learning_rate": 4.559640872167593e-05, + "loss": 0.5693, + "step": 25550 + }, + { + "epoch": 1.09, + "learning_rate": 4.557503206498504e-05, + "loss": 0.5183, + "step": 25560 + }, + { + "epoch": 1.09, + "learning_rate": 4.555365540829414e-05, + "loss": 0.5889, + "step": 25570 + }, + { + "epoch": 1.09, + "learning_rate": 4.553227875160325e-05, + "loss": 0.5398, + "step": 25580 + }, + { + "epoch": 1.09, + "learning_rate": 4.5510902094912356e-05, + "loss": 0.5508, + "step": 25590 + }, + { + "epoch": 1.09, + "learning_rate": 4.548952543822147e-05, + "loss": 0.572, + "step": 25600 + }, + { + "epoch": 1.09, + "learning_rate": 4.546814878153057e-05, + "loss": 0.5295, + "step": 25610 + }, + { + "epoch": 1.09, + "learning_rate": 4.5446772124839677e-05, + "loss": 0.5871, + "step": 25620 + }, + { + "epoch": 1.09, + "learning_rate": 4.542539546814879e-05, + "loss": 0.5449, + "step": 25630 + }, + { + "epoch": 1.09, + "learning_rate": 4.5404018811457886e-05, + "loss": 0.5512, + "step": 25640 + }, + { + "epoch": 1.09, + "learning_rate": 4.5382642154767e-05, + "loss": 0.5712, + "step": 25650 + }, + { + "epoch": 1.09, + "learning_rate": 4.536126549807611e-05, + "loss": 0.5219, + "step": 25660 + }, + { + "epoch": 1.1, + "learning_rate": 4.5339888841385206e-05, + "loss": 0.5876, + "step": 25670 + }, + { + "epoch": 1.1, + "learning_rate": 4.531851218469432e-05, + "loss": 0.545, + "step": 25680 + }, + { + "epoch": 1.1, + "learning_rate": 4.529713552800342e-05, + "loss": 0.5442, + "step": 25690 + }, + { + "epoch": 1.1, + "learning_rate": 4.5275758871312526e-05, + "loss": 0.5719, + "step": 25700 + }, + { + "epoch": 1.1, + "learning_rate": 4.525438221462164e-05, + "loss": 0.528, + "step": 25710 + }, + { + "epoch": 1.1, + "learning_rate": 4.523300555793074e-05, + "loss": 0.589, + "step": 25720 + }, + { + "epoch": 1.1, + "learning_rate": 4.5211628901239846e-05, + "loss": 0.5308, + "step": 25730 + }, + { + "epoch": 1.1, + "learning_rate": 4.519025224454895e-05, + "loss": 0.5585, + "step": 25740 + }, + { + "epoch": 1.1, + "learning_rate": 4.516887558785806e-05, + "loss": 0.5839, + "step": 25750 + }, + { + "epoch": 1.1, + "learning_rate": 4.5147498931167174e-05, + "loss": 0.5289, + "step": 25760 + }, + { + "epoch": 1.1, + "learning_rate": 4.512612227447627e-05, + "loss": 0.5823, + "step": 25770 + }, + { + "epoch": 1.1, + "learning_rate": 4.510474561778538e-05, + "loss": 0.5308, + "step": 25780 + }, + { + "epoch": 1.1, + "learning_rate": 4.508336896109449e-05, + "loss": 0.5367, + "step": 25790 + }, + { + "epoch": 1.1, + "learning_rate": 4.506199230440359e-05, + "loss": 0.5829, + "step": 25800 + }, + { + "epoch": 1.1, + "learning_rate": 4.5040615647712696e-05, + "loss": 0.5231, + "step": 25810 + }, + { + "epoch": 1.1, + "learning_rate": 4.501923899102181e-05, + "loss": 0.5893, + "step": 25820 + }, + { + "epoch": 1.1, + "learning_rate": 4.499786233433091e-05, + "loss": 0.537, + "step": 25830 + }, + { + "epoch": 1.1, + "learning_rate": 4.4976485677640016e-05, + "loss": 0.5455, + "step": 25840 + }, + { + "epoch": 1.1, + "learning_rate": 4.495510902094913e-05, + "loss": 0.5768, + "step": 25850 + }, + { + "epoch": 1.1, + "learning_rate": 4.493373236425823e-05, + "loss": 0.524, + "step": 25860 + }, + { + "epoch": 1.1, + "learning_rate": 4.491235570756734e-05, + "loss": 0.5896, + "step": 25870 + }, + { + "epoch": 1.1, + "learning_rate": 4.489097905087645e-05, + "loss": 0.5389, + "step": 25880 + }, + { + "epoch": 1.1, + "learning_rate": 4.486960239418555e-05, + "loss": 0.5403, + "step": 25890 + }, + { + "epoch": 1.1, + "learning_rate": 4.484822573749466e-05, + "loss": 0.5707, + "step": 25900 + }, + { + "epoch": 1.11, + "learning_rate": 4.482684908080376e-05, + "loss": 0.5161, + "step": 25910 + }, + { + "epoch": 1.11, + "learning_rate": 4.480547242411287e-05, + "loss": 0.5924, + "step": 25920 + }, + { + "epoch": 1.11, + "learning_rate": 4.478409576742198e-05, + "loss": 0.5455, + "step": 25930 + }, + { + "epoch": 1.11, + "learning_rate": 4.476271911073108e-05, + "loss": 0.5506, + "step": 25940 + }, + { + "epoch": 1.11, + "learning_rate": 4.474134245404019e-05, + "loss": 0.5725, + "step": 25950 + }, + { + "epoch": 1.11, + "learning_rate": 4.47199657973493e-05, + "loss": 0.529, + "step": 25960 + }, + { + "epoch": 1.11, + "learning_rate": 4.46985891406584e-05, + "loss": 0.5901, + "step": 25970 + }, + { + "epoch": 1.11, + "learning_rate": 4.4677212483967513e-05, + "loss": 0.5406, + "step": 25980 + }, + { + "epoch": 1.11, + "learning_rate": 4.465583582727662e-05, + "loss": 0.5513, + "step": 25990 + }, + { + "epoch": 1.11, + "learning_rate": 4.463445917058572e-05, + "loss": 0.5732, + "step": 26000 + }, + { + "epoch": 1.11, + "learning_rate": 4.461308251389483e-05, + "loss": 0.5324, + "step": 26010 + }, + { + "epoch": 1.11, + "learning_rate": 4.459170585720394e-05, + "loss": 0.5808, + "step": 26020 + }, + { + "epoch": 1.11, + "learning_rate": 4.457032920051304e-05, + "loss": 0.5364, + "step": 26030 + }, + { + "epoch": 1.11, + "learning_rate": 4.454895254382215e-05, + "loss": 0.5452, + "step": 26040 + }, + { + "epoch": 1.11, + "learning_rate": 4.452757588713126e-05, + "loss": 0.5685, + "step": 26050 + }, + { + "epoch": 1.11, + "learning_rate": 4.4506199230440356e-05, + "loss": 0.5225, + "step": 26060 + }, + { + "epoch": 1.11, + "learning_rate": 4.448482257374947e-05, + "loss": 0.5888, + "step": 26070 + }, + { + "epoch": 1.11, + "learning_rate": 4.446344591705857e-05, + "loss": 0.5358, + "step": 26080 + }, + { + "epoch": 1.11, + "learning_rate": 4.4442069260367683e-05, + "loss": 0.5433, + "step": 26090 + }, + { + "epoch": 1.11, + "learning_rate": 4.442069260367679e-05, + "loss": 0.5739, + "step": 26100 + }, + { + "epoch": 1.11, + "learning_rate": 4.439931594698589e-05, + "loss": 0.5268, + "step": 26110 + }, + { + "epoch": 1.11, + "learning_rate": 4.4377939290295004e-05, + "loss": 0.589, + "step": 26120 + }, + { + "epoch": 1.11, + "learning_rate": 4.43565626336041e-05, + "loss": 0.5357, + "step": 26130 + }, + { + "epoch": 1.12, + "learning_rate": 4.433518597691321e-05, + "loss": 0.5451, + "step": 26140 + }, + { + "epoch": 1.12, + "learning_rate": 4.4313809320222324e-05, + "loss": 0.5656, + "step": 26150 + }, + { + "epoch": 1.12, + "learning_rate": 4.429243266353142e-05, + "loss": 0.5108, + "step": 26160 + }, + { + "epoch": 1.12, + "learning_rate": 4.427105600684053e-05, + "loss": 0.5869, + "step": 26170 + }, + { + "epoch": 1.12, + "learning_rate": 4.424967935014964e-05, + "loss": 0.5323, + "step": 26180 + }, + { + "epoch": 1.12, + "learning_rate": 4.422830269345874e-05, + "loss": 0.5413, + "step": 26190 + }, + { + "epoch": 1.12, + "learning_rate": 4.420692603676785e-05, + "loss": 0.5806, + "step": 26200 + }, + { + "epoch": 1.12, + "learning_rate": 4.418554938007696e-05, + "loss": 0.522, + "step": 26210 + }, + { + "epoch": 1.12, + "learning_rate": 4.416417272338607e-05, + "loss": 0.5835, + "step": 26220 + }, + { + "epoch": 1.12, + "learning_rate": 4.414279606669517e-05, + "loss": 0.5362, + "step": 26230 + }, + { + "epoch": 1.12, + "learning_rate": 4.412141941000428e-05, + "loss": 0.5523, + "step": 26240 + }, + { + "epoch": 1.12, + "learning_rate": 4.410004275331339e-05, + "loss": 0.5596, + "step": 26250 + }, + { + "epoch": 1.12, + "learning_rate": 4.407866609662249e-05, + "loss": 0.5192, + "step": 26260 + }, + { + "epoch": 1.12, + "learning_rate": 4.40572894399316e-05, + "loss": 0.5905, + "step": 26270 + }, + { + "epoch": 1.12, + "learning_rate": 4.40359127832407e-05, + "loss": 0.5376, + "step": 26280 + }, + { + "epoch": 1.12, + "learning_rate": 4.401453612654981e-05, + "loss": 0.5393, + "step": 26290 + }, + { + "epoch": 1.12, + "learning_rate": 4.399315946985892e-05, + "loss": 0.5745, + "step": 26300 + }, + { + "epoch": 1.12, + "learning_rate": 4.397178281316802e-05, + "loss": 0.523, + "step": 26310 + }, + { + "epoch": 1.12, + "learning_rate": 4.395040615647713e-05, + "loss": 0.5895, + "step": 26320 + }, + { + "epoch": 1.12, + "learning_rate": 4.392902949978623e-05, + "loss": 0.5351, + "step": 26330 + }, + { + "epoch": 1.12, + "learning_rate": 4.3907652843095344e-05, + "loss": 0.5443, + "step": 26340 + }, + { + "epoch": 1.12, + "learning_rate": 4.388627618640445e-05, + "loss": 0.5748, + "step": 26350 + }, + { + "epoch": 1.12, + "learning_rate": 4.386489952971355e-05, + "loss": 0.521, + "step": 26360 + }, + { + "epoch": 1.12, + "learning_rate": 4.3843522873022664e-05, + "loss": 0.5919, + "step": 26370 + }, + { + "epoch": 1.13, + "learning_rate": 4.382214621633177e-05, + "loss": 0.5437, + "step": 26380 + }, + { + "epoch": 1.13, + "learning_rate": 4.380076955964087e-05, + "loss": 0.5418, + "step": 26390 + }, + { + "epoch": 1.13, + "learning_rate": 4.377939290294998e-05, + "loss": 0.5696, + "step": 26400 + }, + { + "epoch": 1.13, + "learning_rate": 4.375801624625909e-05, + "loss": 0.5231, + "step": 26410 + }, + { + "epoch": 1.13, + "learning_rate": 4.373663958956819e-05, + "loss": 0.5862, + "step": 26420 + }, + { + "epoch": 1.13, + "learning_rate": 4.37152629328773e-05, + "loss": 0.5265, + "step": 26430 + }, + { + "epoch": 1.13, + "learning_rate": 4.369388627618641e-05, + "loss": 0.5446, + "step": 26440 + }, + { + "epoch": 1.13, + "learning_rate": 4.3672509619495514e-05, + "loss": 0.5674, + "step": 26450 + }, + { + "epoch": 1.13, + "learning_rate": 4.365113296280462e-05, + "loss": 0.5266, + "step": 26460 + }, + { + "epoch": 1.13, + "learning_rate": 4.362975630611373e-05, + "loss": 0.5901, + "step": 26470 + }, + { + "epoch": 1.13, + "learning_rate": 4.3608379649422834e-05, + "loss": 0.5415, + "step": 26480 + }, + { + "epoch": 1.13, + "learning_rate": 4.358700299273194e-05, + "loss": 0.5379, + "step": 26490 + }, + { + "epoch": 1.13, + "learning_rate": 4.356562633604104e-05, + "loss": 0.5656, + "step": 26500 + }, + { + "epoch": 1.13, + "learning_rate": 4.3544249679350154e-05, + "loss": 0.529, + "step": 26510 + }, + { + "epoch": 1.13, + "learning_rate": 4.352287302265926e-05, + "loss": 0.5908, + "step": 26520 + }, + { + "epoch": 1.13, + "learning_rate": 4.350149636596836e-05, + "loss": 0.5325, + "step": 26530 + }, + { + "epoch": 1.13, + "learning_rate": 4.3480119709277474e-05, + "loss": 0.5465, + "step": 26540 + }, + { + "epoch": 1.13, + "learning_rate": 4.345874305258658e-05, + "loss": 0.5705, + "step": 26550 + }, + { + "epoch": 1.13, + "learning_rate": 4.3437366395895684e-05, + "loss": 0.5226, + "step": 26560 + }, + { + "epoch": 1.13, + "learning_rate": 4.3415989739204795e-05, + "loss": 0.5808, + "step": 26570 + }, + { + "epoch": 1.13, + "learning_rate": 4.33946130825139e-05, + "loss": 0.5355, + "step": 26580 + }, + { + "epoch": 1.13, + "learning_rate": 4.3373236425823004e-05, + "loss": 0.5443, + "step": 26590 + }, + { + "epoch": 1.13, + "learning_rate": 4.335185976913211e-05, + "loss": 0.5762, + "step": 26600 + }, + { + "epoch": 1.14, + "learning_rate": 4.333048311244122e-05, + "loss": 0.5319, + "step": 26610 + }, + { + "epoch": 1.14, + "learning_rate": 4.330910645575032e-05, + "loss": 0.5782, + "step": 26620 + }, + { + "epoch": 1.14, + "learning_rate": 4.328772979905943e-05, + "loss": 0.5356, + "step": 26630 + }, + { + "epoch": 1.14, + "learning_rate": 4.326635314236854e-05, + "loss": 0.5447, + "step": 26640 + }, + { + "epoch": 1.14, + "learning_rate": 4.324497648567764e-05, + "loss": 0.5645, + "step": 26650 + }, + { + "epoch": 1.14, + "learning_rate": 4.322359982898675e-05, + "loss": 0.5224, + "step": 26660 + }, + { + "epoch": 1.14, + "learning_rate": 4.3202223172295853e-05, + "loss": 0.5835, + "step": 26670 + }, + { + "epoch": 1.14, + "learning_rate": 4.318084651560496e-05, + "loss": 0.5366, + "step": 26680 + }, + { + "epoch": 1.14, + "learning_rate": 4.315946985891407e-05, + "loss": 0.527, + "step": 26690 + }, + { + "epoch": 1.14, + "learning_rate": 4.3138093202223174e-05, + "loss": 0.5644, + "step": 26700 + }, + { + "epoch": 1.14, + "learning_rate": 4.3116716545532285e-05, + "loss": 0.5258, + "step": 26710 + }, + { + "epoch": 1.14, + "learning_rate": 4.309533988884138e-05, + "loss": 0.5855, + "step": 26720 + }, + { + "epoch": 1.14, + "learning_rate": 4.3073963232150494e-05, + "loss": 0.5336, + "step": 26730 + }, + { + "epoch": 1.14, + "learning_rate": 4.3052586575459605e-05, + "loss": 0.5415, + "step": 26740 + }, + { + "epoch": 1.14, + "learning_rate": 4.30312099187687e-05, + "loss": 0.573, + "step": 26750 + }, + { + "epoch": 1.14, + "learning_rate": 4.3009833262077814e-05, + "loss": 0.5242, + "step": 26760 + }, + { + "epoch": 1.14, + "learning_rate": 4.298845660538692e-05, + "loss": 0.5835, + "step": 26770 + }, + { + "epoch": 1.14, + "learning_rate": 4.2967079948696023e-05, + "loss": 0.5314, + "step": 26780 + }, + { + "epoch": 1.14, + "learning_rate": 4.2945703292005135e-05, + "loss": 0.5343, + "step": 26790 + }, + { + "epoch": 1.14, + "learning_rate": 4.292432663531424e-05, + "loss": 0.57, + "step": 26800 + }, + { + "epoch": 1.14, + "learning_rate": 4.2902949978623344e-05, + "loss": 0.5261, + "step": 26810 + }, + { + "epoch": 1.14, + "learning_rate": 4.288157332193245e-05, + "loss": 0.5876, + "step": 26820 + }, + { + "epoch": 1.14, + "learning_rate": 4.286019666524156e-05, + "loss": 0.5324, + "step": 26830 + }, + { + "epoch": 1.15, + "learning_rate": 4.283882000855067e-05, + "loss": 0.5412, + "step": 26840 + }, + { + "epoch": 1.15, + "learning_rate": 4.281744335185977e-05, + "loss": 0.5632, + "step": 26850 + }, + { + "epoch": 1.15, + "learning_rate": 4.279606669516888e-05, + "loss": 0.5334, + "step": 26860 + }, + { + "epoch": 1.15, + "learning_rate": 4.2774690038477984e-05, + "loss": 0.5869, + "step": 26870 + }, + { + "epoch": 1.15, + "learning_rate": 4.275331338178709e-05, + "loss": 0.5311, + "step": 26880 + }, + { + "epoch": 1.15, + "learning_rate": 4.273193672509619e-05, + "loss": 0.5373, + "step": 26890 + }, + { + "epoch": 1.15, + "learning_rate": 4.2710560068405305e-05, + "loss": 0.5723, + "step": 26900 + }, + { + "epoch": 1.15, + "learning_rate": 4.268918341171441e-05, + "loss": 0.5192, + "step": 26910 + }, + { + "epoch": 1.15, + "learning_rate": 4.2667806755023514e-05, + "loss": 0.5985, + "step": 26920 + }, + { + "epoch": 1.15, + "learning_rate": 4.2646430098332625e-05, + "loss": 0.5409, + "step": 26930 + }, + { + "epoch": 1.15, + "learning_rate": 4.262505344164173e-05, + "loss": 0.549, + "step": 26940 + }, + { + "epoch": 1.15, + "learning_rate": 4.2603676784950834e-05, + "loss": 0.5693, + "step": 26950 + }, + { + "epoch": 1.15, + "learning_rate": 4.2582300128259945e-05, + "loss": 0.522, + "step": 26960 + }, + { + "epoch": 1.15, + "learning_rate": 4.256092347156905e-05, + "loss": 0.5799, + "step": 26970 + }, + { + "epoch": 1.15, + "learning_rate": 4.2539546814878154e-05, + "loss": 0.5247, + "step": 26980 + }, + { + "epoch": 1.15, + "learning_rate": 4.251817015818726e-05, + "loss": 0.5404, + "step": 26990 + }, + { + "epoch": 1.15, + "learning_rate": 4.249679350149637e-05, + "loss": 0.5747, + "step": 27000 + }, + { + "epoch": 1.15, + "learning_rate": 4.2475416844805475e-05, + "loss": 0.5198, + "step": 27010 + }, + { + "epoch": 1.15, + "learning_rate": 4.245404018811458e-05, + "loss": 0.5877, + "step": 27020 + }, + { + "epoch": 1.15, + "learning_rate": 4.243266353142369e-05, + "loss": 0.5352, + "step": 27030 + }, + { + "epoch": 1.15, + "learning_rate": 4.2411286874732795e-05, + "loss": 0.5382, + "step": 27040 + }, + { + "epoch": 1.15, + "learning_rate": 4.23899102180419e-05, + "loss": 0.5681, + "step": 27050 + }, + { + "epoch": 1.15, + "learning_rate": 4.236853356135101e-05, + "loss": 0.5219, + "step": 27060 + }, + { + "epoch": 1.15, + "learning_rate": 4.2347156904660115e-05, + "loss": 0.5936, + "step": 27070 + }, + { + "epoch": 1.16, + "learning_rate": 4.232578024796922e-05, + "loss": 0.5226, + "step": 27080 + }, + { + "epoch": 1.16, + "learning_rate": 4.2304403591278324e-05, + "loss": 0.5422, + "step": 27090 + }, + { + "epoch": 1.16, + "learning_rate": 4.2283026934587436e-05, + "loss": 0.5731, + "step": 27100 + }, + { + "epoch": 1.16, + "learning_rate": 4.226165027789654e-05, + "loss": 0.5172, + "step": 27110 + }, + { + "epoch": 1.16, + "learning_rate": 4.2240273621205645e-05, + "loss": 0.5876, + "step": 27120 + }, + { + "epoch": 1.16, + "learning_rate": 4.2218896964514756e-05, + "loss": 0.5325, + "step": 27130 + }, + { + "epoch": 1.16, + "learning_rate": 4.2197520307823854e-05, + "loss": 0.5501, + "step": 27140 + }, + { + "epoch": 1.16, + "learning_rate": 4.2176143651132965e-05, + "loss": 0.5705, + "step": 27150 + }, + { + "epoch": 1.16, + "learning_rate": 4.215476699444207e-05, + "loss": 0.5238, + "step": 27160 + }, + { + "epoch": 1.16, + "learning_rate": 4.213339033775118e-05, + "loss": 0.586, + "step": 27170 + }, + { + "epoch": 1.16, + "learning_rate": 4.2112013681060285e-05, + "loss": 0.5315, + "step": 27180 + }, + { + "epoch": 1.16, + "learning_rate": 4.209063702436939e-05, + "loss": 0.5471, + "step": 27190 + }, + { + "epoch": 1.16, + "learning_rate": 4.20692603676785e-05, + "loss": 0.5693, + "step": 27200 + }, + { + "epoch": 1.16, + "learning_rate": 4.20478837109876e-05, + "loss": 0.5162, + "step": 27210 + }, + { + "epoch": 1.16, + "learning_rate": 4.202650705429671e-05, + "loss": 0.5898, + "step": 27220 + }, + { + "epoch": 1.16, + "learning_rate": 4.200513039760582e-05, + "loss": 0.5339, + "step": 27230 + }, + { + "epoch": 1.16, + "learning_rate": 4.198375374091492e-05, + "loss": 0.5419, + "step": 27240 + }, + { + "epoch": 1.16, + "learning_rate": 4.196237708422403e-05, + "loss": 0.5705, + "step": 27250 + }, + { + "epoch": 1.16, + "learning_rate": 4.1941000427533135e-05, + "loss": 0.5172, + "step": 27260 + }, + { + "epoch": 1.16, + "learning_rate": 4.191962377084224e-05, + "loss": 0.5822, + "step": 27270 + }, + { + "epoch": 1.16, + "learning_rate": 4.189824711415135e-05, + "loss": 0.5297, + "step": 27280 + }, + { + "epoch": 1.16, + "learning_rate": 4.1876870457460455e-05, + "loss": 0.5421, + "step": 27290 + }, + { + "epoch": 1.16, + "learning_rate": 4.185549380076956e-05, + "loss": 0.5586, + "step": 27300 + }, + { + "epoch": 1.17, + "learning_rate": 4.1834117144078664e-05, + "loss": 0.5268, + "step": 27310 + }, + { + "epoch": 1.17, + "learning_rate": 4.1812740487387775e-05, + "loss": 0.5768, + "step": 27320 + }, + { + "epoch": 1.17, + "learning_rate": 4.179136383069689e-05, + "loss": 0.5315, + "step": 27330 + }, + { + "epoch": 1.17, + "learning_rate": 4.1769987174005984e-05, + "loss": 0.5365, + "step": 27340 + }, + { + "epoch": 1.17, + "learning_rate": 4.1748610517315096e-05, + "loss": 0.5702, + "step": 27350 + }, + { + "epoch": 1.17, + "learning_rate": 4.17272338606242e-05, + "loss": 0.537, + "step": 27360 + }, + { + "epoch": 1.17, + "learning_rate": 4.1705857203933305e-05, + "loss": 0.5854, + "step": 27370 + }, + { + "epoch": 1.17, + "learning_rate": 4.1684480547242416e-05, + "loss": 0.5334, + "step": 27380 + }, + { + "epoch": 1.17, + "learning_rate": 4.166310389055152e-05, + "loss": 0.5387, + "step": 27390 + }, + { + "epoch": 1.17, + "learning_rate": 4.1641727233860625e-05, + "loss": 0.5669, + "step": 27400 + }, + { + "epoch": 1.17, + "learning_rate": 4.162035057716973e-05, + "loss": 0.529, + "step": 27410 + }, + { + "epoch": 1.17, + "learning_rate": 4.159897392047884e-05, + "loss": 0.5782, + "step": 27420 + }, + { + "epoch": 1.17, + "learning_rate": 4.1577597263787945e-05, + "loss": 0.5451, + "step": 27430 + }, + { + "epoch": 1.17, + "learning_rate": 4.155622060709705e-05, + "loss": 0.5383, + "step": 27440 + }, + { + "epoch": 1.17, + "learning_rate": 4.153484395040616e-05, + "loss": 0.565, + "step": 27450 + }, + { + "epoch": 1.17, + "learning_rate": 4.1513467293715266e-05, + "loss": 0.5274, + "step": 27460 + }, + { + "epoch": 1.17, + "learning_rate": 4.149209063702437e-05, + "loss": 0.5822, + "step": 27470 + }, + { + "epoch": 1.17, + "learning_rate": 4.1470713980333475e-05, + "loss": 0.5401, + "step": 27480 + }, + { + "epoch": 1.17, + "learning_rate": 4.1449337323642586e-05, + "loss": 0.5455, + "step": 27490 + }, + { + "epoch": 1.17, + "learning_rate": 4.142796066695169e-05, + "loss": 0.5746, + "step": 27500 + }, + { + "epoch": 1.17, + "learning_rate": 4.1406584010260795e-05, + "loss": 0.5214, + "step": 27510 + }, + { + "epoch": 1.17, + "learning_rate": 4.1385207353569906e-05, + "loss": 0.5706, + "step": 27520 + }, + { + "epoch": 1.17, + "learning_rate": 4.136383069687901e-05, + "loss": 0.5383, + "step": 27530 + }, + { + "epoch": 1.17, + "learning_rate": 4.1342454040188115e-05, + "loss": 0.5477, + "step": 27540 + }, + { + "epoch": 1.18, + "learning_rate": 4.1321077383497227e-05, + "loss": 0.5602, + "step": 27550 + }, + { + "epoch": 1.18, + "learning_rate": 4.129970072680633e-05, + "loss": 0.5158, + "step": 27560 + }, + { + "epoch": 1.18, + "learning_rate": 4.1278324070115436e-05, + "loss": 0.5895, + "step": 27570 + }, + { + "epoch": 1.18, + "learning_rate": 4.125694741342454e-05, + "loss": 0.5286, + "step": 27580 + }, + { + "epoch": 1.18, + "learning_rate": 4.123557075673365e-05, + "loss": 0.5382, + "step": 27590 + }, + { + "epoch": 1.18, + "learning_rate": 4.1214194100042756e-05, + "loss": 0.5638, + "step": 27600 + }, + { + "epoch": 1.18, + "learning_rate": 4.119281744335186e-05, + "loss": 0.5323, + "step": 27610 + }, + { + "epoch": 1.18, + "learning_rate": 4.117144078666097e-05, + "loss": 0.5834, + "step": 27620 + }, + { + "epoch": 1.18, + "learning_rate": 4.1150064129970076e-05, + "loss": 0.5383, + "step": 27630 + }, + { + "epoch": 1.18, + "learning_rate": 4.112868747327918e-05, + "loss": 0.5395, + "step": 27640 + }, + { + "epoch": 1.18, + "learning_rate": 4.110731081658829e-05, + "loss": 0.5681, + "step": 27650 + }, + { + "epoch": 1.18, + "learning_rate": 4.1085934159897397e-05, + "loss": 0.5255, + "step": 27660 + }, + { + "epoch": 1.18, + "learning_rate": 4.10645575032065e-05, + "loss": 0.5883, + "step": 27670 + }, + { + "epoch": 1.18, + "learning_rate": 4.1043180846515606e-05, + "loss": 0.5428, + "step": 27680 + }, + { + "epoch": 1.18, + "learning_rate": 4.102180418982472e-05, + "loss": 0.5416, + "step": 27690 + }, + { + "epoch": 1.18, + "learning_rate": 4.1000427533133815e-05, + "loss": 0.5681, + "step": 27700 + }, + { + "epoch": 1.18, + "learning_rate": 4.0979050876442926e-05, + "loss": 0.5196, + "step": 27710 + }, + { + "epoch": 1.18, + "learning_rate": 4.095767421975204e-05, + "loss": 0.5879, + "step": 27720 + }, + { + "epoch": 1.18, + "learning_rate": 4.0936297563061135e-05, + "loss": 0.5448, + "step": 27730 + }, + { + "epoch": 1.18, + "learning_rate": 4.0914920906370246e-05, + "loss": 0.5473, + "step": 27740 + }, + { + "epoch": 1.18, + "learning_rate": 4.089354424967935e-05, + "loss": 0.5635, + "step": 27750 + }, + { + "epoch": 1.18, + "learning_rate": 4.0872167592988455e-05, + "loss": 0.5224, + "step": 27760 + }, + { + "epoch": 1.18, + "learning_rate": 4.0850790936297566e-05, + "loss": 0.5835, + "step": 27770 + }, + { + "epoch": 1.19, + "learning_rate": 4.082941427960667e-05, + "loss": 0.5303, + "step": 27780 + }, + { + "epoch": 1.19, + "learning_rate": 4.080803762291578e-05, + "loss": 0.5364, + "step": 27790 + }, + { + "epoch": 1.19, + "learning_rate": 4.078666096622488e-05, + "loss": 0.5728, + "step": 27800 + }, + { + "epoch": 1.19, + "learning_rate": 4.076528430953399e-05, + "loss": 0.5251, + "step": 27810 + }, + { + "epoch": 1.19, + "learning_rate": 4.07439076528431e-05, + "loss": 0.5798, + "step": 27820 + }, + { + "epoch": 1.19, + "learning_rate": 4.07225309961522e-05, + "loss": 0.5331, + "step": 27830 + }, + { + "epoch": 1.19, + "learning_rate": 4.070115433946131e-05, + "loss": 0.5344, + "step": 27840 + }, + { + "epoch": 1.19, + "learning_rate": 4.0679777682770416e-05, + "loss": 0.5721, + "step": 27850 + }, + { + "epoch": 1.19, + "learning_rate": 4.065840102607952e-05, + "loss": 0.5264, + "step": 27860 + }, + { + "epoch": 1.19, + "learning_rate": 4.063702436938863e-05, + "loss": 0.5951, + "step": 27870 + }, + { + "epoch": 1.19, + "learning_rate": 4.0615647712697736e-05, + "loss": 0.5346, + "step": 27880 + }, + { + "epoch": 1.19, + "learning_rate": 4.059427105600684e-05, + "loss": 0.5429, + "step": 27890 + }, + { + "epoch": 1.19, + "learning_rate": 4.0572894399315945e-05, + "loss": 0.56, + "step": 27900 + }, + { + "epoch": 1.19, + "learning_rate": 4.055151774262506e-05, + "loss": 0.5207, + "step": 27910 + }, + { + "epoch": 1.19, + "learning_rate": 4.053014108593416e-05, + "loss": 0.5894, + "step": 27920 + }, + { + "epoch": 1.19, + "learning_rate": 4.0508764429243266e-05, + "loss": 0.5352, + "step": 27930 + }, + { + "epoch": 1.19, + "learning_rate": 4.048738777255238e-05, + "loss": 0.541, + "step": 27940 + }, + { + "epoch": 1.19, + "learning_rate": 4.046601111586148e-05, + "loss": 0.5732, + "step": 27950 + }, + { + "epoch": 1.19, + "learning_rate": 4.0444634459170586e-05, + "loss": 0.5221, + "step": 27960 + }, + { + "epoch": 1.19, + "learning_rate": 4.042325780247969e-05, + "loss": 0.5849, + "step": 27970 + }, + { + "epoch": 1.19, + "learning_rate": 4.04018811457888e-05, + "loss": 0.5314, + "step": 27980 + }, + { + "epoch": 1.19, + "learning_rate": 4.0380504489097906e-05, + "loss": 0.5391, + "step": 27990 + }, + { + "epoch": 1.19, + "learning_rate": 4.035912783240701e-05, + "loss": 0.5665, + "step": 28000 + }, + { + "epoch": 1.19, + "learning_rate": 4.033775117571612e-05, + "loss": 0.5222, + "step": 28010 + }, + { + "epoch": 1.2, + "learning_rate": 4.031637451902523e-05, + "loss": 0.5772, + "step": 28020 + }, + { + "epoch": 1.2, + "learning_rate": 4.029499786233433e-05, + "loss": 0.521, + "step": 28030 + }, + { + "epoch": 1.2, + "learning_rate": 4.027362120564344e-05, + "loss": 0.5398, + "step": 28040 + }, + { + "epoch": 1.2, + "learning_rate": 4.025224454895255e-05, + "loss": 0.583, + "step": 28050 + }, + { + "epoch": 1.2, + "learning_rate": 4.023086789226165e-05, + "loss": 0.524, + "step": 28060 + }, + { + "epoch": 1.2, + "learning_rate": 4.0209491235570756e-05, + "loss": 0.5916, + "step": 28070 + }, + { + "epoch": 1.2, + "learning_rate": 4.018811457887987e-05, + "loss": 0.5414, + "step": 28080 + }, + { + "epoch": 1.2, + "learning_rate": 4.016673792218897e-05, + "loss": 0.5315, + "step": 28090 + }, + { + "epoch": 1.2, + "learning_rate": 4.0145361265498076e-05, + "loss": 0.5714, + "step": 28100 + }, + { + "epoch": 1.2, + "learning_rate": 4.012398460880719e-05, + "loss": 0.5265, + "step": 28110 + }, + { + "epoch": 1.2, + "learning_rate": 4.010260795211629e-05, + "loss": 0.5774, + "step": 28120 + }, + { + "epoch": 1.2, + "learning_rate": 4.00812312954254e-05, + "loss": 0.5286, + "step": 28130 + }, + { + "epoch": 1.2, + "learning_rate": 4.005985463873451e-05, + "loss": 0.5389, + "step": 28140 + }, + { + "epoch": 1.2, + "learning_rate": 4.003847798204361e-05, + "loss": 0.5623, + "step": 28150 + }, + { + "epoch": 1.2, + "learning_rate": 4.001710132535272e-05, + "loss": 0.5251, + "step": 28160 + }, + { + "epoch": 1.2, + "learning_rate": 3.999572466866182e-05, + "loss": 0.5805, + "step": 28170 + }, + { + "epoch": 1.2, + "learning_rate": 3.997434801197093e-05, + "loss": 0.5365, + "step": 28180 + }, + { + "epoch": 1.2, + "learning_rate": 3.995297135528004e-05, + "loss": 0.5396, + "step": 28190 + }, + { + "epoch": 1.2, + "learning_rate": 3.993159469858914e-05, + "loss": 0.5647, + "step": 28200 + }, + { + "epoch": 1.2, + "learning_rate": 3.991021804189825e-05, + "loss": 0.5197, + "step": 28210 + }, + { + "epoch": 1.2, + "learning_rate": 3.988884138520735e-05, + "loss": 0.5787, + "step": 28220 + }, + { + "epoch": 1.2, + "learning_rate": 3.986746472851646e-05, + "loss": 0.5316, + "step": 28230 + }, + { + "epoch": 1.2, + "learning_rate": 3.9846088071825567e-05, + "loss": 0.5351, + "step": 28240 + }, + { + "epoch": 1.21, + "learning_rate": 3.982471141513468e-05, + "loss": 0.5673, + "step": 28250 + }, + { + "epoch": 1.21, + "learning_rate": 3.980333475844378e-05, + "loss": 0.5287, + "step": 28260 + }, + { + "epoch": 1.21, + "learning_rate": 3.978195810175289e-05, + "loss": 0.5749, + "step": 28270 + }, + { + "epoch": 1.21, + "learning_rate": 3.9760581445062e-05, + "loss": 0.5258, + "step": 28280 + }, + { + "epoch": 1.21, + "learning_rate": 3.9739204788371096e-05, + "loss": 0.5367, + "step": 28290 + }, + { + "epoch": 1.21, + "learning_rate": 3.971782813168021e-05, + "loss": 0.5621, + "step": 28300 + }, + { + "epoch": 1.21, + "learning_rate": 3.969645147498932e-05, + "loss": 0.5232, + "step": 28310 + }, + { + "epoch": 1.21, + "learning_rate": 3.9675074818298416e-05, + "loss": 0.5871, + "step": 28320 + }, + { + "epoch": 1.21, + "learning_rate": 3.965369816160753e-05, + "loss": 0.5368, + "step": 28330 + }, + { + "epoch": 1.21, + "learning_rate": 3.963232150491663e-05, + "loss": 0.5434, + "step": 28340 + }, + { + "epoch": 1.21, + "learning_rate": 3.9610944848225737e-05, + "loss": 0.5687, + "step": 28350 + }, + { + "epoch": 1.21, + "learning_rate": 3.958956819153485e-05, + "loss": 0.5161, + "step": 28360 + }, + { + "epoch": 1.21, + "learning_rate": 3.956819153484395e-05, + "loss": 0.5788, + "step": 28370 + }, + { + "epoch": 1.21, + "learning_rate": 3.954681487815306e-05, + "loss": 0.5281, + "step": 28380 + }, + { + "epoch": 1.21, + "learning_rate": 3.952543822146216e-05, + "loss": 0.5363, + "step": 28390 + }, + { + "epoch": 1.21, + "learning_rate": 3.950406156477127e-05, + "loss": 0.5578, + "step": 28400 + }, + { + "epoch": 1.21, + "learning_rate": 3.9482684908080384e-05, + "loss": 0.5191, + "step": 28410 + }, + { + "epoch": 1.21, + "learning_rate": 3.946130825138948e-05, + "loss": 0.5766, + "step": 28420 + }, + { + "epoch": 1.21, + "learning_rate": 3.943993159469859e-05, + "loss": 0.5241, + "step": 28430 + }, + { + "epoch": 1.21, + "learning_rate": 3.94185549380077e-05, + "loss": 0.5439, + "step": 28440 + }, + { + "epoch": 1.21, + "learning_rate": 3.93971782813168e-05, + "loss": 0.5761, + "step": 28450 + }, + { + "epoch": 1.21, + "learning_rate": 3.937580162462591e-05, + "loss": 0.52, + "step": 28460 + }, + { + "epoch": 1.21, + "learning_rate": 3.935442496793502e-05, + "loss": 0.5846, + "step": 28470 + }, + { + "epoch": 1.22, + "learning_rate": 3.933304831124412e-05, + "loss": 0.5384, + "step": 28480 + }, + { + "epoch": 1.22, + "learning_rate": 3.931167165455323e-05, + "loss": 0.54, + "step": 28490 + }, + { + "epoch": 1.22, + "learning_rate": 3.929029499786234e-05, + "loss": 0.5724, + "step": 28500 + }, + { + "epoch": 1.22, + "learning_rate": 3.926891834117144e-05, + "loss": 0.5318, + "step": 28510 + }, + { + "epoch": 1.22, + "learning_rate": 3.924754168448055e-05, + "loss": 0.5892, + "step": 28520 + }, + { + "epoch": 1.22, + "learning_rate": 3.922616502778966e-05, + "loss": 0.5398, + "step": 28530 + }, + { + "epoch": 1.22, + "learning_rate": 3.920478837109876e-05, + "loss": 0.5405, + "step": 28540 + }, + { + "epoch": 1.22, + "learning_rate": 3.918341171440787e-05, + "loss": 0.5772, + "step": 28550 + }, + { + "epoch": 1.22, + "learning_rate": 3.916203505771697e-05, + "loss": 0.5195, + "step": 28560 + }, + { + "epoch": 1.22, + "learning_rate": 3.914065840102608e-05, + "loss": 0.5777, + "step": 28570 + }, + { + "epoch": 1.22, + "learning_rate": 3.911928174433519e-05, + "loss": 0.5357, + "step": 28580 + }, + { + "epoch": 1.22, + "learning_rate": 3.909790508764429e-05, + "loss": 0.5342, + "step": 28590 + }, + { + "epoch": 1.22, + "learning_rate": 3.9076528430953404e-05, + "loss": 0.5688, + "step": 28600 + }, + { + "epoch": 1.22, + "learning_rate": 3.905515177426251e-05, + "loss": 0.517, + "step": 28610 + }, + { + "epoch": 1.22, + "learning_rate": 3.903377511757161e-05, + "loss": 0.5809, + "step": 28620 + }, + { + "epoch": 1.22, + "learning_rate": 3.9012398460880724e-05, + "loss": 0.5345, + "step": 28630 + }, + { + "epoch": 1.22, + "learning_rate": 3.899102180418983e-05, + "loss": 0.536, + "step": 28640 + }, + { + "epoch": 1.22, + "learning_rate": 3.896964514749893e-05, + "loss": 0.5751, + "step": 28650 + }, + { + "epoch": 1.22, + "learning_rate": 3.894826849080804e-05, + "loss": 0.5207, + "step": 28660 + }, + { + "epoch": 1.22, + "learning_rate": 3.892689183411715e-05, + "loss": 0.5897, + "step": 28670 + }, + { + "epoch": 1.22, + "learning_rate": 3.890551517742625e-05, + "loss": 0.5367, + "step": 28680 + }, + { + "epoch": 1.22, + "learning_rate": 3.888413852073536e-05, + "loss": 0.5404, + "step": 28690 + }, + { + "epoch": 1.22, + "learning_rate": 3.886276186404447e-05, + "loss": 0.5679, + "step": 28700 + }, + { + "epoch": 1.22, + "learning_rate": 3.884138520735357e-05, + "loss": 0.515, + "step": 28710 + }, + { + "epoch": 1.23, + "learning_rate": 3.882000855066268e-05, + "loss": 0.5826, + "step": 28720 + }, + { + "epoch": 1.23, + "learning_rate": 3.879863189397179e-05, + "loss": 0.5246, + "step": 28730 + }, + { + "epoch": 1.23, + "learning_rate": 3.8777255237280894e-05, + "loss": 0.5357, + "step": 28740 + }, + { + "epoch": 1.23, + "learning_rate": 3.875587858059e-05, + "loss": 0.5704, + "step": 28750 + }, + { + "epoch": 1.23, + "learning_rate": 3.87345019238991e-05, + "loss": 0.5219, + "step": 28760 + }, + { + "epoch": 1.23, + "learning_rate": 3.8713125267208214e-05, + "loss": 0.5877, + "step": 28770 + }, + { + "epoch": 1.23, + "learning_rate": 3.869174861051731e-05, + "loss": 0.5208, + "step": 28780 + }, + { + "epoch": 1.23, + "learning_rate": 3.867037195382642e-05, + "loss": 0.545, + "step": 28790 + }, + { + "epoch": 1.23, + "learning_rate": 3.8648995297135534e-05, + "loss": 0.5647, + "step": 28800 + }, + { + "epoch": 1.23, + "learning_rate": 3.862761864044463e-05, + "loss": 0.5228, + "step": 28810 + }, + { + "epoch": 1.23, + "learning_rate": 3.8606241983753743e-05, + "loss": 0.5823, + "step": 28820 + }, + { + "epoch": 1.23, + "learning_rate": 3.858486532706285e-05, + "loss": 0.5309, + "step": 28830 + }, + { + "epoch": 1.23, + "learning_rate": 3.856348867037195e-05, + "loss": 0.5351, + "step": 28840 + }, + { + "epoch": 1.23, + "learning_rate": 3.8542112013681064e-05, + "loss": 0.5734, + "step": 28850 + }, + { + "epoch": 1.23, + "learning_rate": 3.852073535699017e-05, + "loss": 0.5169, + "step": 28860 + }, + { + "epoch": 1.23, + "learning_rate": 3.849935870029928e-05, + "loss": 0.5788, + "step": 28870 + }, + { + "epoch": 1.23, + "learning_rate": 3.847798204360838e-05, + "loss": 0.5346, + "step": 28880 + }, + { + "epoch": 1.23, + "learning_rate": 3.845660538691749e-05, + "loss": 0.545, + "step": 28890 + }, + { + "epoch": 1.23, + "learning_rate": 3.84352287302266e-05, + "loss": 0.5654, + "step": 28900 + }, + { + "epoch": 1.23, + "learning_rate": 3.84138520735357e-05, + "loss": 0.523, + "step": 28910 + }, + { + "epoch": 1.23, + "learning_rate": 3.839247541684481e-05, + "loss": 0.5912, + "step": 28920 + }, + { + "epoch": 1.23, + "learning_rate": 3.837109876015391e-05, + "loss": 0.5321, + "step": 28930 + }, + { + "epoch": 1.23, + "learning_rate": 3.834972210346302e-05, + "loss": 0.5423, + "step": 28940 + }, + { + "epoch": 1.24, + "learning_rate": 3.832834544677213e-05, + "loss": 0.562, + "step": 28950 + }, + { + "epoch": 1.24, + "learning_rate": 3.8306968790081234e-05, + "loss": 0.5268, + "step": 28960 + }, + { + "epoch": 1.24, + "learning_rate": 3.828559213339034e-05, + "loss": 0.5792, + "step": 28970 + }, + { + "epoch": 1.24, + "learning_rate": 3.826421547669944e-05, + "loss": 0.5309, + "step": 28980 + }, + { + "epoch": 1.24, + "learning_rate": 3.8242838820008554e-05, + "loss": 0.5498, + "step": 28990 + }, + { + "epoch": 1.24, + "learning_rate": 3.822146216331766e-05, + "loss": 0.5709, + "step": 29000 + }, + { + "epoch": 1.24, + "learning_rate": 3.820008550662676e-05, + "loss": 0.5182, + "step": 29010 + }, + { + "epoch": 1.24, + "learning_rate": 3.8178708849935874e-05, + "loss": 0.5773, + "step": 29020 + }, + { + "epoch": 1.24, + "learning_rate": 3.815733219324498e-05, + "loss": 0.5353, + "step": 29030 + }, + { + "epoch": 1.24, + "learning_rate": 3.813595553655408e-05, + "loss": 0.5403, + "step": 29040 + }, + { + "epoch": 1.24, + "learning_rate": 3.811457887986319e-05, + "loss": 0.5621, + "step": 29050 + }, + { + "epoch": 1.24, + "learning_rate": 3.80932022231723e-05, + "loss": 0.5292, + "step": 29060 + }, + { + "epoch": 1.24, + "learning_rate": 3.8071825566481404e-05, + "loss": 0.5733, + "step": 29070 + }, + { + "epoch": 1.24, + "learning_rate": 3.805044890979051e-05, + "loss": 0.5387, + "step": 29080 + }, + { + "epoch": 1.24, + "learning_rate": 3.802907225309962e-05, + "loss": 0.5361, + "step": 29090 + }, + { + "epoch": 1.24, + "learning_rate": 3.8007695596408724e-05, + "loss": 0.5673, + "step": 29100 + }, + { + "epoch": 1.24, + "learning_rate": 3.798631893971783e-05, + "loss": 0.5223, + "step": 29110 + }, + { + "epoch": 1.24, + "learning_rate": 3.796494228302694e-05, + "loss": 0.5827, + "step": 29120 + }, + { + "epoch": 1.24, + "learning_rate": 3.7943565626336044e-05, + "loss": 0.536, + "step": 29130 + }, + { + "epoch": 1.24, + "learning_rate": 3.792218896964515e-05, + "loss": 0.5403, + "step": 29140 + }, + { + "epoch": 1.24, + "learning_rate": 3.790081231295425e-05, + "loss": 0.5643, + "step": 29150 + }, + { + "epoch": 1.24, + "learning_rate": 3.7879435656263365e-05, + "loss": 0.5214, + "step": 29160 + }, + { + "epoch": 1.24, + "learning_rate": 3.785805899957247e-05, + "loss": 0.5833, + "step": 29170 + }, + { + "epoch": 1.24, + "learning_rate": 3.7836682342881574e-05, + "loss": 0.5306, + "step": 29180 + }, + { + "epoch": 1.25, + "learning_rate": 3.7815305686190685e-05, + "loss": 0.5416, + "step": 29190 + }, + { + "epoch": 1.25, + "learning_rate": 3.779392902949979e-05, + "loss": 0.5692, + "step": 29200 + }, + { + "epoch": 1.25, + "learning_rate": 3.7772552372808894e-05, + "loss": 0.5235, + "step": 29210 + }, + { + "epoch": 1.25, + "learning_rate": 3.7751175716118005e-05, + "loss": 0.5803, + "step": 29220 + }, + { + "epoch": 1.25, + "learning_rate": 3.772979905942711e-05, + "loss": 0.5411, + "step": 29230 + }, + { + "epoch": 1.25, + "learning_rate": 3.7708422402736214e-05, + "loss": 0.5404, + "step": 29240 + }, + { + "epoch": 1.25, + "learning_rate": 3.768704574604532e-05, + "loss": 0.5681, + "step": 29250 + }, + { + "epoch": 1.25, + "learning_rate": 3.766566908935443e-05, + "loss": 0.5234, + "step": 29260 + }, + { + "epoch": 1.25, + "learning_rate": 3.7644292432663534e-05, + "loss": 0.586, + "step": 29270 + }, + { + "epoch": 1.25, + "learning_rate": 3.762291577597264e-05, + "loss": 0.5316, + "step": 29280 + }, + { + "epoch": 1.25, + "learning_rate": 3.760153911928175e-05, + "loss": 0.537, + "step": 29290 + }, + { + "epoch": 1.25, + "learning_rate": 3.758016246259085e-05, + "loss": 0.5613, + "step": 29300 + }, + { + "epoch": 1.25, + "learning_rate": 3.755878580589996e-05, + "loss": 0.522, + "step": 29310 + }, + { + "epoch": 1.25, + "learning_rate": 3.7537409149209064e-05, + "loss": 0.578, + "step": 29320 + }, + { + "epoch": 1.25, + "learning_rate": 3.751603249251817e-05, + "loss": 0.5344, + "step": 29330 + }, + { + "epoch": 1.25, + "learning_rate": 3.749465583582728e-05, + "loss": 0.5324, + "step": 29340 + }, + { + "epoch": 1.25, + "learning_rate": 3.7473279179136384e-05, + "loss": 0.5662, + "step": 29350 + }, + { + "epoch": 1.25, + "learning_rate": 3.7451902522445495e-05, + "loss": 0.5243, + "step": 29360 + }, + { + "epoch": 1.25, + "learning_rate": 3.743052586575459e-05, + "loss": 0.5759, + "step": 29370 + }, + { + "epoch": 1.25, + "learning_rate": 3.7409149209063704e-05, + "loss": 0.5256, + "step": 29380 + }, + { + "epoch": 1.25, + "learning_rate": 3.7387772552372816e-05, + "loss": 0.5412, + "step": 29390 + }, + { + "epoch": 1.25, + "learning_rate": 3.7366395895681913e-05, + "loss": 0.5732, + "step": 29400 + }, + { + "epoch": 1.25, + "learning_rate": 3.7345019238991025e-05, + "loss": 0.5222, + "step": 29410 + }, + { + "epoch": 1.26, + "learning_rate": 3.732364258230013e-05, + "loss": 0.5817, + "step": 29420 + }, + { + "epoch": 1.26, + "learning_rate": 3.7302265925609234e-05, + "loss": 0.5352, + "step": 29430 + }, + { + "epoch": 1.26, + "learning_rate": 3.7280889268918345e-05, + "loss": 0.5358, + "step": 29440 + }, + { + "epoch": 1.26, + "learning_rate": 3.725951261222745e-05, + "loss": 0.5681, + "step": 29450 + }, + { + "epoch": 1.26, + "learning_rate": 3.7238135955536554e-05, + "loss": 0.5245, + "step": 29460 + }, + { + "epoch": 1.26, + "learning_rate": 3.721675929884566e-05, + "loss": 0.579, + "step": 29470 + }, + { + "epoch": 1.26, + "learning_rate": 3.719538264215477e-05, + "loss": 0.5244, + "step": 29480 + }, + { + "epoch": 1.26, + "learning_rate": 3.717400598546388e-05, + "loss": 0.5407, + "step": 29490 + }, + { + "epoch": 1.26, + "learning_rate": 3.715262932877298e-05, + "loss": 0.5686, + "step": 29500 + }, + { + "epoch": 1.26, + "learning_rate": 3.713125267208209e-05, + "loss": 0.5219, + "step": 29510 + }, + { + "epoch": 1.26, + "learning_rate": 3.7109876015391195e-05, + "loss": 0.5819, + "step": 29520 + }, + { + "epoch": 1.26, + "learning_rate": 3.70884993587003e-05, + "loss": 0.5306, + "step": 29530 + }, + { + "epoch": 1.26, + "learning_rate": 3.7067122702009404e-05, + "loss": 0.5316, + "step": 29540 + }, + { + "epoch": 1.26, + "learning_rate": 3.7045746045318515e-05, + "loss": 0.5641, + "step": 29550 + }, + { + "epoch": 1.26, + "learning_rate": 3.702436938862762e-05, + "loss": 0.5161, + "step": 29560 + }, + { + "epoch": 1.26, + "learning_rate": 3.7002992731936724e-05, + "loss": 0.5775, + "step": 29570 + }, + { + "epoch": 1.26, + "learning_rate": 3.6981616075245835e-05, + "loss": 0.5274, + "step": 29580 + }, + { + "epoch": 1.26, + "learning_rate": 3.696023941855494e-05, + "loss": 0.5353, + "step": 29590 + }, + { + "epoch": 1.26, + "learning_rate": 3.6938862761864044e-05, + "loss": 0.566, + "step": 29600 + }, + { + "epoch": 1.26, + "learning_rate": 3.6917486105173156e-05, + "loss": 0.5192, + "step": 29610 + }, + { + "epoch": 1.26, + "learning_rate": 3.689610944848226e-05, + "loss": 0.5761, + "step": 29620 + }, + { + "epoch": 1.26, + "learning_rate": 3.6874732791791365e-05, + "loss": 0.5263, + "step": 29630 + }, + { + "epoch": 1.26, + "learning_rate": 3.685335613510047e-05, + "loss": 0.542, + "step": 29640 + }, + { + "epoch": 1.26, + "learning_rate": 3.683197947840958e-05, + "loss": 0.5681, + "step": 29650 + }, + { + "epoch": 1.27, + "learning_rate": 3.6810602821718685e-05, + "loss": 0.5194, + "step": 29660 + }, + { + "epoch": 1.27, + "learning_rate": 3.678922616502779e-05, + "loss": 0.5802, + "step": 29670 + }, + { + "epoch": 1.27, + "learning_rate": 3.67678495083369e-05, + "loss": 0.5291, + "step": 29680 + }, + { + "epoch": 1.27, + "learning_rate": 3.6746472851646005e-05, + "loss": 0.5354, + "step": 29690 + }, + { + "epoch": 1.27, + "learning_rate": 3.672509619495511e-05, + "loss": 0.5848, + "step": 29700 + }, + { + "epoch": 1.27, + "learning_rate": 3.670371953826422e-05, + "loss": 0.5268, + "step": 29710 + }, + { + "epoch": 1.27, + "learning_rate": 3.6682342881573326e-05, + "loss": 0.5855, + "step": 29720 + }, + { + "epoch": 1.27, + "learning_rate": 3.666096622488243e-05, + "loss": 0.5323, + "step": 29730 + }, + { + "epoch": 1.27, + "learning_rate": 3.6639589568191535e-05, + "loss": 0.5465, + "step": 29740 + }, + { + "epoch": 1.27, + "learning_rate": 3.6618212911500646e-05, + "loss": 0.5684, + "step": 29750 + }, + { + "epoch": 1.27, + "learning_rate": 3.659683625480975e-05, + "loss": 0.5217, + "step": 29760 + }, + { + "epoch": 1.27, + "learning_rate": 3.6575459598118855e-05, + "loss": 0.5807, + "step": 29770 + }, + { + "epoch": 1.27, + "learning_rate": 3.6554082941427966e-05, + "loss": 0.5363, + "step": 29780 + }, + { + "epoch": 1.27, + "learning_rate": 3.6532706284737064e-05, + "loss": 0.5413, + "step": 29790 + }, + { + "epoch": 1.27, + "learning_rate": 3.6511329628046175e-05, + "loss": 0.5759, + "step": 29800 + }, + { + "epoch": 1.27, + "learning_rate": 3.648995297135528e-05, + "loss": 0.5165, + "step": 29810 + }, + { + "epoch": 1.27, + "learning_rate": 3.646857631466439e-05, + "loss": 0.5839, + "step": 29820 + }, + { + "epoch": 1.27, + "learning_rate": 3.6447199657973496e-05, + "loss": 0.5301, + "step": 29830 + }, + { + "epoch": 1.27, + "learning_rate": 3.64258230012826e-05, + "loss": 0.535, + "step": 29840 + }, + { + "epoch": 1.27, + "learning_rate": 3.640444634459171e-05, + "loss": 0.5741, + "step": 29850 + }, + { + "epoch": 1.27, + "learning_rate": 3.638306968790081e-05, + "loss": 0.517, + "step": 29860 + }, + { + "epoch": 1.27, + "learning_rate": 3.636169303120992e-05, + "loss": 0.5797, + "step": 29870 + }, + { + "epoch": 1.27, + "learning_rate": 3.634031637451903e-05, + "loss": 0.533, + "step": 29880 + }, + { + "epoch": 1.28, + "learning_rate": 3.631893971782813e-05, + "loss": 0.5356, + "step": 29890 + }, + { + "epoch": 1.28, + "learning_rate": 3.629756306113724e-05, + "loss": 0.5618, + "step": 29900 + }, + { + "epoch": 1.28, + "learning_rate": 3.6276186404446345e-05, + "loss": 0.5155, + "step": 29910 + }, + { + "epoch": 1.28, + "learning_rate": 3.625480974775545e-05, + "loss": 0.576, + "step": 29920 + }, + { + "epoch": 1.28, + "learning_rate": 3.623343309106456e-05, + "loss": 0.5279, + "step": 29930 + }, + { + "epoch": 1.28, + "learning_rate": 3.6212056434373665e-05, + "loss": 0.5424, + "step": 29940 + }, + { + "epoch": 1.28, + "learning_rate": 3.619067977768277e-05, + "loss": 0.5668, + "step": 29950 + }, + { + "epoch": 1.28, + "learning_rate": 3.6169303120991874e-05, + "loss": 0.5161, + "step": 29960 + }, + { + "epoch": 1.28, + "learning_rate": 3.6147926464300986e-05, + "loss": 0.5808, + "step": 29970 + }, + { + "epoch": 1.28, + "learning_rate": 3.61265498076101e-05, + "loss": 0.5319, + "step": 29980 + }, + { + "epoch": 1.28, + "learning_rate": 3.6105173150919195e-05, + "loss": 0.5384, + "step": 29990 + }, + { + "epoch": 1.28, + "learning_rate": 3.6083796494228306e-05, + "loss": 0.5704, + "step": 30000 + }, + { + "epoch": 1.28, + "learning_rate": 3.606241983753741e-05, + "loss": 0.5184, + "step": 30010 + }, + { + "epoch": 1.28, + "learning_rate": 3.6041043180846515e-05, + "loss": 0.5757, + "step": 30020 + }, + { + "epoch": 1.28, + "learning_rate": 3.6019666524155626e-05, + "loss": 0.5345, + "step": 30030 + }, + { + "epoch": 1.28, + "learning_rate": 3.599828986746473e-05, + "loss": 0.5386, + "step": 30040 + }, + { + "epoch": 1.28, + "learning_rate": 3.5976913210773835e-05, + "loss": 0.5742, + "step": 30050 + }, + { + "epoch": 1.28, + "learning_rate": 3.595553655408294e-05, + "loss": 0.5198, + "step": 30060 + }, + { + "epoch": 1.28, + "learning_rate": 3.593415989739205e-05, + "loss": 0.594, + "step": 30070 + }, + { + "epoch": 1.28, + "learning_rate": 3.5912783240701156e-05, + "loss": 0.5332, + "step": 30080 + }, + { + "epoch": 1.28, + "learning_rate": 3.589140658401026e-05, + "loss": 0.5505, + "step": 30090 + }, + { + "epoch": 1.28, + "learning_rate": 3.587002992731937e-05, + "loss": 0.564, + "step": 30100 + }, + { + "epoch": 1.28, + "learning_rate": 3.5848653270628476e-05, + "loss": 0.5089, + "step": 30110 + }, + { + "epoch": 1.28, + "learning_rate": 3.582727661393758e-05, + "loss": 0.574, + "step": 30120 + }, + { + "epoch": 1.29, + "learning_rate": 3.5805899957246685e-05, + "loss": 0.53, + "step": 30130 + }, + { + "epoch": 1.29, + "learning_rate": 3.5784523300555796e-05, + "loss": 0.542, + "step": 30140 + }, + { + "epoch": 1.29, + "learning_rate": 3.57631466438649e-05, + "loss": 0.5549, + "step": 30150 + }, + { + "epoch": 1.29, + "learning_rate": 3.5741769987174005e-05, + "loss": 0.5212, + "step": 30160 + }, + { + "epoch": 1.29, + "learning_rate": 3.572039333048312e-05, + "loss": 0.5834, + "step": 30170 + }, + { + "epoch": 1.29, + "learning_rate": 3.569901667379222e-05, + "loss": 0.5249, + "step": 30180 + }, + { + "epoch": 1.29, + "learning_rate": 3.5677640017101326e-05, + "loss": 0.5381, + "step": 30190 + }, + { + "epoch": 1.29, + "learning_rate": 3.565626336041044e-05, + "loss": 0.5712, + "step": 30200 + }, + { + "epoch": 1.29, + "learning_rate": 3.563488670371954e-05, + "loss": 0.5291, + "step": 30210 + }, + { + "epoch": 1.29, + "learning_rate": 3.5613510047028646e-05, + "loss": 0.5785, + "step": 30220 + }, + { + "epoch": 1.29, + "learning_rate": 3.559213339033775e-05, + "loss": 0.5319, + "step": 30230 + }, + { + "epoch": 1.29, + "learning_rate": 3.557075673364686e-05, + "loss": 0.5481, + "step": 30240 + }, + { + "epoch": 1.29, + "learning_rate": 3.5549380076955966e-05, + "loss": 0.5599, + "step": 30250 + }, + { + "epoch": 1.29, + "learning_rate": 3.552800342026507e-05, + "loss": 0.5166, + "step": 30260 + }, + { + "epoch": 1.29, + "learning_rate": 3.550662676357418e-05, + "loss": 0.5815, + "step": 30270 + }, + { + "epoch": 1.29, + "learning_rate": 3.5485250106883287e-05, + "loss": 0.5281, + "step": 30280 + }, + { + "epoch": 1.29, + "learning_rate": 3.546387345019239e-05, + "loss": 0.5324, + "step": 30290 + }, + { + "epoch": 1.29, + "learning_rate": 3.54424967935015e-05, + "loss": 0.5662, + "step": 30300 + }, + { + "epoch": 1.29, + "learning_rate": 3.542112013681061e-05, + "loss": 0.5237, + "step": 30310 + }, + { + "epoch": 1.29, + "learning_rate": 3.539974348011971e-05, + "loss": 0.5843, + "step": 30320 + }, + { + "epoch": 1.29, + "learning_rate": 3.5378366823428816e-05, + "loss": 0.5284, + "step": 30330 + }, + { + "epoch": 1.29, + "learning_rate": 3.535699016673793e-05, + "loss": 0.5364, + "step": 30340 + }, + { + "epoch": 1.29, + "learning_rate": 3.5335613510047025e-05, + "loss": 0.5696, + "step": 30350 + }, + { + "epoch": 1.3, + "learning_rate": 3.5314236853356136e-05, + "loss": 0.5198, + "step": 30360 + }, + { + "epoch": 1.3, + "learning_rate": 3.529286019666525e-05, + "loss": 0.5781, + "step": 30370 + }, + { + "epoch": 1.3, + "learning_rate": 3.5271483539974345e-05, + "loss": 0.5357, + "step": 30380 + }, + { + "epoch": 1.3, + "learning_rate": 3.5250106883283457e-05, + "loss": 0.5332, + "step": 30390 + }, + { + "epoch": 1.3, + "learning_rate": 3.522873022659256e-05, + "loss": 0.5574, + "step": 30400 + }, + { + "epoch": 1.3, + "learning_rate": 3.5207353569901666e-05, + "loss": 0.522, + "step": 30410 + }, + { + "epoch": 1.3, + "learning_rate": 3.518597691321078e-05, + "loss": 0.5884, + "step": 30420 + }, + { + "epoch": 1.3, + "learning_rate": 3.516460025651988e-05, + "loss": 0.5352, + "step": 30430 + }, + { + "epoch": 1.3, + "learning_rate": 3.514322359982899e-05, + "loss": 0.5359, + "step": 30440 + }, + { + "epoch": 1.3, + "learning_rate": 3.512184694313809e-05, + "loss": 0.5688, + "step": 30450 + }, + { + "epoch": 1.3, + "learning_rate": 3.51004702864472e-05, + "loss": 0.5187, + "step": 30460 + }, + { + "epoch": 1.3, + "learning_rate": 3.507909362975631e-05, + "loss": 0.5805, + "step": 30470 + }, + { + "epoch": 1.3, + "learning_rate": 3.505771697306541e-05, + "loss": 0.5217, + "step": 30480 + }, + { + "epoch": 1.3, + "learning_rate": 3.503634031637452e-05, + "loss": 0.5394, + "step": 30490 + }, + { + "epoch": 1.3, + "learning_rate": 3.5014963659683626e-05, + "loss": 0.5767, + "step": 30500 + }, + { + "epoch": 1.3, + "learning_rate": 3.499358700299273e-05, + "loss": 0.525, + "step": 30510 + }, + { + "epoch": 1.3, + "learning_rate": 3.497221034630184e-05, + "loss": 0.5718, + "step": 30520 + }, + { + "epoch": 1.3, + "learning_rate": 3.495083368961095e-05, + "loss": 0.5236, + "step": 30530 + }, + { + "epoch": 1.3, + "learning_rate": 3.492945703292005e-05, + "loss": 0.5318, + "step": 30540 + }, + { + "epoch": 1.3, + "learning_rate": 3.4908080376229156e-05, + "loss": 0.5633, + "step": 30550 + }, + { + "epoch": 1.3, + "learning_rate": 3.488670371953827e-05, + "loss": 0.5176, + "step": 30560 + }, + { + "epoch": 1.3, + "learning_rate": 3.486532706284737e-05, + "loss": 0.5812, + "step": 30570 + }, + { + "epoch": 1.3, + "learning_rate": 3.4843950406156476e-05, + "loss": 0.529, + "step": 30580 + }, + { + "epoch": 1.31, + "learning_rate": 3.482257374946559e-05, + "loss": 0.5428, + "step": 30590 + }, + { + "epoch": 1.31, + "learning_rate": 3.480119709277469e-05, + "loss": 0.5757, + "step": 30600 + }, + { + "epoch": 1.31, + "learning_rate": 3.4779820436083796e-05, + "loss": 0.5186, + "step": 30610 + }, + { + "epoch": 1.31, + "learning_rate": 3.47584437793929e-05, + "loss": 0.5784, + "step": 30620 + }, + { + "epoch": 1.31, + "learning_rate": 3.473706712270201e-05, + "loss": 0.534, + "step": 30630 + }, + { + "epoch": 1.31, + "learning_rate": 3.471569046601112e-05, + "loss": 0.5342, + "step": 30640 + }, + { + "epoch": 1.31, + "learning_rate": 3.469431380932022e-05, + "loss": 0.5708, + "step": 30650 + }, + { + "epoch": 1.31, + "learning_rate": 3.467293715262933e-05, + "loss": 0.5124, + "step": 30660 + }, + { + "epoch": 1.31, + "learning_rate": 3.465156049593844e-05, + "loss": 0.5683, + "step": 30670 + }, + { + "epoch": 1.31, + "learning_rate": 3.463018383924754e-05, + "loss": 0.5317, + "step": 30680 + }, + { + "epoch": 1.31, + "learning_rate": 3.460880718255665e-05, + "loss": 0.534, + "step": 30690 + }, + { + "epoch": 1.31, + "learning_rate": 3.458743052586576e-05, + "loss": 0.5692, + "step": 30700 + }, + { + "epoch": 1.31, + "learning_rate": 3.456605386917486e-05, + "loss": 0.5226, + "step": 30710 + }, + { + "epoch": 1.31, + "learning_rate": 3.4544677212483966e-05, + "loss": 0.5819, + "step": 30720 + }, + { + "epoch": 1.31, + "learning_rate": 3.452330055579308e-05, + "loss": 0.5248, + "step": 30730 + }, + { + "epoch": 1.31, + "learning_rate": 3.450192389910218e-05, + "loss": 0.5306, + "step": 30740 + }, + { + "epoch": 1.31, + "learning_rate": 3.448054724241129e-05, + "loss": 0.5615, + "step": 30750 + }, + { + "epoch": 1.31, + "learning_rate": 3.44591705857204e-05, + "loss": 0.5122, + "step": 30760 + }, + { + "epoch": 1.31, + "learning_rate": 3.44377939290295e-05, + "loss": 0.5765, + "step": 30770 + }, + { + "epoch": 1.31, + "learning_rate": 3.441641727233861e-05, + "loss": 0.531, + "step": 30780 + }, + { + "epoch": 1.31, + "learning_rate": 3.439504061564772e-05, + "loss": 0.5342, + "step": 30790 + }, + { + "epoch": 1.31, + "learning_rate": 3.437366395895682e-05, + "loss": 0.557, + "step": 30800 + }, + { + "epoch": 1.31, + "learning_rate": 3.435228730226593e-05, + "loss": 0.5038, + "step": 30810 + }, + { + "epoch": 1.31, + "learning_rate": 3.433091064557503e-05, + "loss": 0.5785, + "step": 30820 + }, + { + "epoch": 1.32, + "learning_rate": 3.430953398888414e-05, + "loss": 0.5338, + "step": 30830 + }, + { + "epoch": 1.32, + "learning_rate": 3.428815733219325e-05, + "loss": 0.5405, + "step": 30840 + }, + { + "epoch": 1.32, + "learning_rate": 3.426678067550235e-05, + "loss": 0.5637, + "step": 30850 + }, + { + "epoch": 1.32, + "learning_rate": 3.4245404018811463e-05, + "loss": 0.5148, + "step": 30860 + }, + { + "epoch": 1.32, + "learning_rate": 3.422402736212056e-05, + "loss": 0.5776, + "step": 30870 + }, + { + "epoch": 1.32, + "learning_rate": 3.420265070542967e-05, + "loss": 0.5283, + "step": 30880 + }, + { + "epoch": 1.32, + "learning_rate": 3.418127404873878e-05, + "loss": 0.5327, + "step": 30890 + }, + { + "epoch": 1.32, + "learning_rate": 3.415989739204789e-05, + "loss": 0.5741, + "step": 30900 + }, + { + "epoch": 1.32, + "learning_rate": 3.413852073535699e-05, + "loss": 0.5134, + "step": 30910 + }, + { + "epoch": 1.32, + "learning_rate": 3.41171440786661e-05, + "loss": 0.5905, + "step": 30920 + }, + { + "epoch": 1.32, + "learning_rate": 3.409576742197521e-05, + "loss": 0.5325, + "step": 30930 + }, + { + "epoch": 1.32, + "learning_rate": 3.4074390765284306e-05, + "loss": 0.5404, + "step": 30940 + }, + { + "epoch": 1.32, + "learning_rate": 3.405301410859342e-05, + "loss": 0.5671, + "step": 30950 + }, + { + "epoch": 1.32, + "learning_rate": 3.403163745190253e-05, + "loss": 0.5129, + "step": 30960 + }, + { + "epoch": 1.32, + "learning_rate": 3.4010260795211627e-05, + "loss": 0.5845, + "step": 30970 + }, + { + "epoch": 1.32, + "learning_rate": 3.398888413852074e-05, + "loss": 0.5284, + "step": 30980 + }, + { + "epoch": 1.32, + "learning_rate": 3.396750748182984e-05, + "loss": 0.5381, + "step": 30990 + }, + { + "epoch": 1.32, + "learning_rate": 3.394613082513895e-05, + "loss": 0.56, + "step": 31000 + }, + { + "epoch": 1.32, + "learning_rate": 3.392475416844806e-05, + "loss": 0.523, + "step": 31010 + }, + { + "epoch": 1.32, + "learning_rate": 3.390337751175716e-05, + "loss": 0.5794, + "step": 31020 + }, + { + "epoch": 1.32, + "learning_rate": 3.388200085506627e-05, + "loss": 0.5228, + "step": 31030 + }, + { + "epoch": 1.32, + "learning_rate": 3.386062419837537e-05, + "loss": 0.5308, + "step": 31040 + }, + { + "epoch": 1.32, + "learning_rate": 3.383924754168448e-05, + "loss": 0.5606, + "step": 31050 + }, + { + "epoch": 1.33, + "learning_rate": 3.3817870884993594e-05, + "loss": 0.5142, + "step": 31060 + }, + { + "epoch": 1.33, + "learning_rate": 3.379649422830269e-05, + "loss": 0.5742, + "step": 31070 + }, + { + "epoch": 1.33, + "learning_rate": 3.37751175716118e-05, + "loss": 0.5238, + "step": 31080 + }, + { + "epoch": 1.33, + "learning_rate": 3.375374091492091e-05, + "loss": 0.5374, + "step": 31090 + }, + { + "epoch": 1.33, + "learning_rate": 3.373236425823001e-05, + "loss": 0.5757, + "step": 31100 + }, + { + "epoch": 1.33, + "learning_rate": 3.3710987601539124e-05, + "loss": 0.514, + "step": 31110 + }, + { + "epoch": 1.33, + "learning_rate": 3.368961094484823e-05, + "loss": 0.5831, + "step": 31120 + }, + { + "epoch": 1.33, + "learning_rate": 3.366823428815733e-05, + "loss": 0.5306, + "step": 31130 + }, + { + "epoch": 1.33, + "learning_rate": 3.364685763146644e-05, + "loss": 0.5369, + "step": 31140 + }, + { + "epoch": 1.33, + "learning_rate": 3.362548097477555e-05, + "loss": 0.5649, + "step": 31150 + }, + { + "epoch": 1.33, + "learning_rate": 3.360410431808465e-05, + "loss": 0.5145, + "step": 31160 + }, + { + "epoch": 1.33, + "learning_rate": 3.358272766139376e-05, + "loss": 0.5787, + "step": 31170 + }, + { + "epoch": 1.33, + "learning_rate": 3.356135100470287e-05, + "loss": 0.5311, + "step": 31180 + }, + { + "epoch": 1.33, + "learning_rate": 3.353997434801197e-05, + "loss": 0.5353, + "step": 31190 + }, + { + "epoch": 1.33, + "learning_rate": 3.351859769132108e-05, + "loss": 0.5614, + "step": 31200 + }, + { + "epoch": 1.33, + "learning_rate": 3.349722103463018e-05, + "loss": 0.5189, + "step": 31210 + }, + { + "epoch": 1.33, + "learning_rate": 3.3475844377939294e-05, + "loss": 0.5776, + "step": 31220 + }, + { + "epoch": 1.33, + "learning_rate": 3.34544677212484e-05, + "loss": 0.5321, + "step": 31230 + }, + { + "epoch": 1.33, + "learning_rate": 3.34330910645575e-05, + "loss": 0.5382, + "step": 31240 + }, + { + "epoch": 1.33, + "learning_rate": 3.3411714407866614e-05, + "loss": 0.5576, + "step": 31250 + }, + { + "epoch": 1.33, + "learning_rate": 3.339033775117572e-05, + "loss": 0.5191, + "step": 31260 + }, + { + "epoch": 1.33, + "learning_rate": 3.336896109448482e-05, + "loss": 0.5794, + "step": 31270 + }, + { + "epoch": 1.33, + "learning_rate": 3.3347584437793934e-05, + "loss": 0.5326, + "step": 31280 + }, + { + "epoch": 1.33, + "learning_rate": 3.332620778110304e-05, + "loss": 0.5384, + "step": 31290 + }, + { + "epoch": 1.34, + "learning_rate": 3.330483112441214e-05, + "loss": 0.5657, + "step": 31300 + }, + { + "epoch": 1.34, + "learning_rate": 3.328345446772125e-05, + "loss": 0.5207, + "step": 31310 + }, + { + "epoch": 1.34, + "learning_rate": 3.326207781103036e-05, + "loss": 0.5721, + "step": 31320 + }, + { + "epoch": 1.34, + "learning_rate": 3.3240701154339464e-05, + "loss": 0.5246, + "step": 31330 + }, + { + "epoch": 1.34, + "learning_rate": 3.321932449764857e-05, + "loss": 0.5388, + "step": 31340 + }, + { + "epoch": 1.34, + "learning_rate": 3.319794784095768e-05, + "loss": 0.5709, + "step": 31350 + }, + { + "epoch": 1.34, + "learning_rate": 3.317657118426678e-05, + "loss": 0.5168, + "step": 31360 + }, + { + "epoch": 1.34, + "learning_rate": 3.315519452757589e-05, + "loss": 0.587, + "step": 31370 + }, + { + "epoch": 1.34, + "learning_rate": 3.3133817870885e-05, + "loss": 0.5253, + "step": 31380 + }, + { + "epoch": 1.34, + "learning_rate": 3.3112441214194104e-05, + "loss": 0.5419, + "step": 31390 + }, + { + "epoch": 1.34, + "learning_rate": 3.309106455750321e-05, + "loss": 0.5612, + "step": 31400 + }, + { + "epoch": 1.34, + "learning_rate": 3.306968790081231e-05, + "loss": 0.5262, + "step": 31410 + }, + { + "epoch": 1.34, + "learning_rate": 3.3048311244121424e-05, + "loss": 0.5812, + "step": 31420 + }, + { + "epoch": 1.34, + "learning_rate": 3.302693458743052e-05, + "loss": 0.5289, + "step": 31430 + }, + { + "epoch": 1.34, + "learning_rate": 3.3005557930739633e-05, + "loss": 0.5448, + "step": 31440 + }, + { + "epoch": 1.34, + "learning_rate": 3.2984181274048745e-05, + "loss": 0.5624, + "step": 31450 + }, + { + "epoch": 1.34, + "learning_rate": 3.296280461735784e-05, + "loss": 0.5226, + "step": 31460 + }, + { + "epoch": 1.34, + "learning_rate": 3.2941427960666954e-05, + "loss": 0.5722, + "step": 31470 + }, + { + "epoch": 1.34, + "learning_rate": 3.292005130397606e-05, + "loss": 0.5289, + "step": 31480 + }, + { + "epoch": 1.34, + "learning_rate": 3.289867464728516e-05, + "loss": 0.5348, + "step": 31490 + }, + { + "epoch": 1.34, + "learning_rate": 3.2877297990594274e-05, + "loss": 0.5652, + "step": 31500 + }, + { + "epoch": 1.34, + "learning_rate": 3.285592133390338e-05, + "loss": 0.5143, + "step": 31510 + }, + { + "epoch": 1.34, + "learning_rate": 3.283454467721249e-05, + "loss": 0.5674, + "step": 31520 + }, + { + "epoch": 1.35, + "learning_rate": 3.281316802052159e-05, + "loss": 0.5198, + "step": 31530 + }, + { + "epoch": 1.35, + "learning_rate": 3.27917913638307e-05, + "loss": 0.5416, + "step": 31540 + }, + { + "epoch": 1.35, + "learning_rate": 3.277041470713981e-05, + "loss": 0.5736, + "step": 31550 + }, + { + "epoch": 1.35, + "learning_rate": 3.274903805044891e-05, + "loss": 0.5047, + "step": 31560 + }, + { + "epoch": 1.35, + "learning_rate": 3.272766139375802e-05, + "loss": 0.58, + "step": 31570 + }, + { + "epoch": 1.35, + "learning_rate": 3.2706284737067124e-05, + "loss": 0.5262, + "step": 31580 + }, + { + "epoch": 1.35, + "learning_rate": 3.268490808037623e-05, + "loss": 0.5333, + "step": 31590 + }, + { + "epoch": 1.35, + "learning_rate": 3.266353142368534e-05, + "loss": 0.5536, + "step": 31600 + }, + { + "epoch": 1.35, + "learning_rate": 3.2642154766994444e-05, + "loss": 0.519, + "step": 31610 + }, + { + "epoch": 1.35, + "learning_rate": 3.262077811030355e-05, + "loss": 0.57, + "step": 31620 + }, + { + "epoch": 1.35, + "learning_rate": 3.259940145361265e-05, + "loss": 0.5244, + "step": 31630 + }, + { + "epoch": 1.35, + "learning_rate": 3.2578024796921764e-05, + "loss": 0.5211, + "step": 31640 + }, + { + "epoch": 1.35, + "learning_rate": 3.255664814023087e-05, + "loss": 0.5688, + "step": 31650 + }, + { + "epoch": 1.35, + "learning_rate": 3.253527148353997e-05, + "loss": 0.5162, + "step": 31660 + }, + { + "epoch": 1.35, + "learning_rate": 3.2513894826849085e-05, + "loss": 0.5749, + "step": 31670 + }, + { + "epoch": 1.35, + "learning_rate": 3.249251817015819e-05, + "loss": 0.5242, + "step": 31680 + }, + { + "epoch": 1.35, + "learning_rate": 3.2471141513467294e-05, + "loss": 0.5234, + "step": 31690 + }, + { + "epoch": 1.35, + "learning_rate": 3.24497648567764e-05, + "loss": 0.5676, + "step": 31700 + }, + { + "epoch": 1.35, + "learning_rate": 3.242838820008551e-05, + "loss": 0.5186, + "step": 31710 + }, + { + "epoch": 1.35, + "learning_rate": 3.2407011543394614e-05, + "loss": 0.5789, + "step": 31720 + }, + { + "epoch": 1.35, + "learning_rate": 3.238563488670372e-05, + "loss": 0.5277, + "step": 31730 + }, + { + "epoch": 1.35, + "learning_rate": 3.236425823001283e-05, + "loss": 0.5412, + "step": 31740 + }, + { + "epoch": 1.35, + "learning_rate": 3.2342881573321934e-05, + "loss": 0.5709, + "step": 31750 + }, + { + "epoch": 1.35, + "learning_rate": 3.232150491663104e-05, + "loss": 0.5199, + "step": 31760 + }, + { + "epoch": 1.36, + "learning_rate": 3.230012825994015e-05, + "loss": 0.5772, + "step": 31770 + }, + { + "epoch": 1.36, + "learning_rate": 3.2278751603249255e-05, + "loss": 0.5289, + "step": 31780 + }, + { + "epoch": 1.36, + "learning_rate": 3.225737494655836e-05, + "loss": 0.5375, + "step": 31790 + }, + { + "epoch": 1.36, + "learning_rate": 3.2235998289867464e-05, + "loss": 0.5603, + "step": 31800 + }, + { + "epoch": 1.36, + "learning_rate": 3.2214621633176575e-05, + "loss": 0.5236, + "step": 31810 + }, + { + "epoch": 1.36, + "learning_rate": 3.219324497648568e-05, + "loss": 0.5784, + "step": 31820 + }, + { + "epoch": 1.36, + "learning_rate": 3.2171868319794784e-05, + "loss": 0.5291, + "step": 31830 + }, + { + "epoch": 1.36, + "learning_rate": 3.2150491663103895e-05, + "loss": 0.5332, + "step": 31840 + }, + { + "epoch": 1.36, + "learning_rate": 3.2129115006413e-05, + "loss": 0.5531, + "step": 31850 + }, + { + "epoch": 1.36, + "learning_rate": 3.2107738349722104e-05, + "loss": 0.5193, + "step": 31860 + }, + { + "epoch": 1.36, + "learning_rate": 3.2086361693031216e-05, + "loss": 0.5864, + "step": 31870 + }, + { + "epoch": 1.36, + "learning_rate": 3.206498503634032e-05, + "loss": 0.5257, + "step": 31880 + }, + { + "epoch": 1.36, + "learning_rate": 3.2043608379649425e-05, + "loss": 0.5277, + "step": 31890 + }, + { + "epoch": 1.36, + "learning_rate": 3.202223172295853e-05, + "loss": 0.5615, + "step": 31900 + }, + { + "epoch": 1.36, + "learning_rate": 3.200085506626764e-05, + "loss": 0.5131, + "step": 31910 + }, + { + "epoch": 1.36, + "learning_rate": 3.1979478409576745e-05, + "loss": 0.5847, + "step": 31920 + }, + { + "epoch": 1.36, + "learning_rate": 3.195810175288585e-05, + "loss": 0.5307, + "step": 31930 + }, + { + "epoch": 1.36, + "learning_rate": 3.193672509619496e-05, + "loss": 0.5336, + "step": 31940 + }, + { + "epoch": 1.36, + "learning_rate": 3.191534843950406e-05, + "loss": 0.5623, + "step": 31950 + }, + { + "epoch": 1.36, + "learning_rate": 3.189397178281317e-05, + "loss": 0.5168, + "step": 31960 + }, + { + "epoch": 1.36, + "learning_rate": 3.1872595126122274e-05, + "loss": 0.5748, + "step": 31970 + }, + { + "epoch": 1.36, + "learning_rate": 3.185121846943138e-05, + "loss": 0.5342, + "step": 31980 + }, + { + "epoch": 1.36, + "learning_rate": 3.182984181274049e-05, + "loss": 0.5413, + "step": 31990 + }, + { + "epoch": 1.37, + "learning_rate": 3.1808465156049594e-05, + "loss": 0.5689, + "step": 32000 + }, + { + "epoch": 1.37, + "learning_rate": 3.1787088499358706e-05, + "loss": 0.5097, + "step": 32010 + }, + { + "epoch": 1.37, + "learning_rate": 3.1765711842667804e-05, + "loss": 0.5798, + "step": 32020 + }, + { + "epoch": 1.37, + "learning_rate": 3.1744335185976915e-05, + "loss": 0.5389, + "step": 32030 + }, + { + "epoch": 1.37, + "learning_rate": 3.1722958529286026e-05, + "loss": 0.5394, + "step": 32040 + }, + { + "epoch": 1.37, + "learning_rate": 3.1701581872595124e-05, + "loss": 0.5643, + "step": 32050 + }, + { + "epoch": 1.37, + "learning_rate": 3.1680205215904235e-05, + "loss": 0.513, + "step": 32060 + }, + { + "epoch": 1.37, + "learning_rate": 3.165882855921334e-05, + "loss": 0.5777, + "step": 32070 + }, + { + "epoch": 1.37, + "learning_rate": 3.1637451902522444e-05, + "loss": 0.5335, + "step": 32080 + }, + { + "epoch": 1.37, + "learning_rate": 3.1616075245831555e-05, + "loss": 0.5365, + "step": 32090 + }, + { + "epoch": 1.37, + "learning_rate": 3.159469858914066e-05, + "loss": 0.5718, + "step": 32100 + }, + { + "epoch": 1.37, + "learning_rate": 3.1573321932449764e-05, + "loss": 0.5076, + "step": 32110 + }, + { + "epoch": 1.37, + "learning_rate": 3.155194527575887e-05, + "loss": 0.5851, + "step": 32120 + }, + { + "epoch": 1.37, + "learning_rate": 3.153056861906798e-05, + "loss": 0.528, + "step": 32130 + }, + { + "epoch": 1.37, + "learning_rate": 3.150919196237709e-05, + "loss": 0.5316, + "step": 32140 + }, + { + "epoch": 1.37, + "learning_rate": 3.148781530568619e-05, + "loss": 0.5575, + "step": 32150 + }, + { + "epoch": 1.37, + "learning_rate": 3.14664386489953e-05, + "loss": 0.5165, + "step": 32160 + }, + { + "epoch": 1.37, + "learning_rate": 3.1445061992304405e-05, + "loss": 0.5719, + "step": 32170 + }, + { + "epoch": 1.37, + "learning_rate": 3.142368533561351e-05, + "loss": 0.5301, + "step": 32180 + }, + { + "epoch": 1.37, + "learning_rate": 3.140230867892262e-05, + "loss": 0.5373, + "step": 32190 + }, + { + "epoch": 1.37, + "learning_rate": 3.1380932022231725e-05, + "loss": 0.5671, + "step": 32200 + }, + { + "epoch": 1.37, + "learning_rate": 3.135955536554083e-05, + "loss": 0.5187, + "step": 32210 + }, + { + "epoch": 1.37, + "learning_rate": 3.1338178708849934e-05, + "loss": 0.5788, + "step": 32220 + }, + { + "epoch": 1.37, + "learning_rate": 3.1316802052159046e-05, + "loss": 0.5255, + "step": 32230 + }, + { + "epoch": 1.38, + "learning_rate": 3.129542539546815e-05, + "loss": 0.5345, + "step": 32240 + }, + { + "epoch": 1.38, + "learning_rate": 3.1274048738777255e-05, + "loss": 0.566, + "step": 32250 + }, + { + "epoch": 1.38, + "learning_rate": 3.1252672082086366e-05, + "loss": 0.5161, + "step": 32260 + }, + { + "epoch": 1.38, + "learning_rate": 3.123129542539547e-05, + "loss": 0.5795, + "step": 32270 + }, + { + "epoch": 1.38, + "learning_rate": 3.1209918768704575e-05, + "loss": 0.5289, + "step": 32280 + }, + { + "epoch": 1.38, + "learning_rate": 3.118854211201368e-05, + "loss": 0.5391, + "step": 32290 + }, + { + "epoch": 1.38, + "learning_rate": 3.116716545532279e-05, + "loss": 0.5624, + "step": 32300 + }, + { + "epoch": 1.38, + "learning_rate": 3.1145788798631895e-05, + "loss": 0.5146, + "step": 32310 + }, + { + "epoch": 1.38, + "learning_rate": 3.1124412141941e-05, + "loss": 0.5868, + "step": 32320 + }, + { + "epoch": 1.38, + "learning_rate": 3.110303548525011e-05, + "loss": 0.5275, + "step": 32330 + }, + { + "epoch": 1.38, + "learning_rate": 3.1081658828559216e-05, + "loss": 0.5313, + "step": 32340 + }, + { + "epoch": 1.38, + "learning_rate": 3.106028217186832e-05, + "loss": 0.5648, + "step": 32350 + }, + { + "epoch": 1.38, + "learning_rate": 3.103890551517743e-05, + "loss": 0.5076, + "step": 32360 + }, + { + "epoch": 1.38, + "learning_rate": 3.1017528858486536e-05, + "loss": 0.5637, + "step": 32370 + }, + { + "epoch": 1.38, + "learning_rate": 3.099615220179564e-05, + "loss": 0.5254, + "step": 32380 + }, + { + "epoch": 1.38, + "learning_rate": 3.0974775545104745e-05, + "loss": 0.526, + "step": 32390 + }, + { + "epoch": 1.38, + "learning_rate": 3.0953398888413856e-05, + "loss": 0.5633, + "step": 32400 + }, + { + "epoch": 1.38, + "learning_rate": 3.093202223172296e-05, + "loss": 0.5195, + "step": 32410 + }, + { + "epoch": 1.38, + "learning_rate": 3.0910645575032065e-05, + "loss": 0.575, + "step": 32420 + }, + { + "epoch": 1.38, + "learning_rate": 3.0889268918341177e-05, + "loss": 0.5296, + "step": 32430 + }, + { + "epoch": 1.38, + "learning_rate": 3.0867892261650274e-05, + "loss": 0.539, + "step": 32440 + }, + { + "epoch": 1.38, + "learning_rate": 3.0846515604959386e-05, + "loss": 0.5581, + "step": 32450 + }, + { + "epoch": 1.38, + "learning_rate": 3.08251389482685e-05, + "loss": 0.5106, + "step": 32460 + }, + { + "epoch": 1.39, + "learning_rate": 3.08037622915776e-05, + "loss": 0.5786, + "step": 32470 + }, + { + "epoch": 1.39, + "learning_rate": 3.0782385634886706e-05, + "loss": 0.5268, + "step": 32480 + }, + { + "epoch": 1.39, + "learning_rate": 3.076100897819581e-05, + "loss": 0.5383, + "step": 32490 + }, + { + "epoch": 1.39, + "learning_rate": 3.073963232150492e-05, + "loss": 0.5595, + "step": 32500 + }, + { + "epoch": 1.39, + "learning_rate": 3.071825566481402e-05, + "loss": 0.5094, + "step": 32510 + }, + { + "epoch": 1.39, + "learning_rate": 3.069687900812313e-05, + "loss": 0.5772, + "step": 32520 + }, + { + "epoch": 1.39, + "learning_rate": 3.067550235143224e-05, + "loss": 0.5309, + "step": 32530 + }, + { + "epoch": 1.39, + "learning_rate": 3.065412569474134e-05, + "loss": 0.5325, + "step": 32540 + }, + { + "epoch": 1.39, + "learning_rate": 3.063274903805045e-05, + "loss": 0.5613, + "step": 32550 + }, + { + "epoch": 1.39, + "learning_rate": 3.0611372381359556e-05, + "loss": 0.5116, + "step": 32560 + }, + { + "epoch": 1.39, + "learning_rate": 3.058999572466866e-05, + "loss": 0.569, + "step": 32570 + }, + { + "epoch": 1.39, + "learning_rate": 3.056861906797777e-05, + "loss": 0.5261, + "step": 32580 + }, + { + "epoch": 1.39, + "learning_rate": 3.0547242411286876e-05, + "loss": 0.5367, + "step": 32590 + }, + { + "epoch": 1.39, + "learning_rate": 3.052586575459598e-05, + "loss": 0.5703, + "step": 32600 + }, + { + "epoch": 1.39, + "learning_rate": 3.0504489097905088e-05, + "loss": 0.5205, + "step": 32610 + }, + { + "epoch": 1.39, + "learning_rate": 3.0483112441214196e-05, + "loss": 0.5781, + "step": 32620 + }, + { + "epoch": 1.39, + "learning_rate": 3.0461735784523304e-05, + "loss": 0.5207, + "step": 32630 + }, + { + "epoch": 1.39, + "learning_rate": 3.044035912783241e-05, + "loss": 0.5354, + "step": 32640 + }, + { + "epoch": 1.39, + "learning_rate": 3.0418982471141516e-05, + "loss": 0.5669, + "step": 32650 + }, + { + "epoch": 1.39, + "learning_rate": 3.039760581445062e-05, + "loss": 0.5098, + "step": 32660 + }, + { + "epoch": 1.39, + "learning_rate": 3.037622915775973e-05, + "loss": 0.569, + "step": 32670 + }, + { + "epoch": 1.39, + "learning_rate": 3.0354852501068837e-05, + "loss": 0.5232, + "step": 32680 + }, + { + "epoch": 1.39, + "learning_rate": 3.033347584437794e-05, + "loss": 0.5257, + "step": 32690 + }, + { + "epoch": 1.4, + "learning_rate": 3.031209918768705e-05, + "loss": 0.5668, + "step": 32700 + }, + { + "epoch": 1.4, + "learning_rate": 3.029072253099615e-05, + "loss": 0.5144, + "step": 32710 + }, + { + "epoch": 1.4, + "learning_rate": 3.026934587430526e-05, + "loss": 0.5834, + "step": 32720 + }, + { + "epoch": 1.4, + "learning_rate": 3.024796921761437e-05, + "loss": 0.5206, + "step": 32730 + }, + { + "epoch": 1.4, + "learning_rate": 3.0226592560923474e-05, + "loss": 0.5319, + "step": 32740 + }, + { + "epoch": 1.4, + "learning_rate": 3.0205215904232582e-05, + "loss": 0.5594, + "step": 32750 + }, + { + "epoch": 1.4, + "learning_rate": 3.0183839247541683e-05, + "loss": 0.5162, + "step": 32760 + }, + { + "epoch": 1.4, + "learning_rate": 3.0162462590850794e-05, + "loss": 0.578, + "step": 32770 + }, + { + "epoch": 1.4, + "learning_rate": 3.0141085934159895e-05, + "loss": 0.5254, + "step": 32780 + }, + { + "epoch": 1.4, + "learning_rate": 3.0119709277469003e-05, + "loss": 0.5366, + "step": 32790 + }, + { + "epoch": 1.4, + "learning_rate": 3.0098332620778115e-05, + "loss": 0.5704, + "step": 32800 + }, + { + "epoch": 1.4, + "learning_rate": 3.0076955964087216e-05, + "loss": 0.5144, + "step": 32810 + }, + { + "epoch": 1.4, + "learning_rate": 3.0055579307396327e-05, + "loss": 0.5798, + "step": 32820 + }, + { + "epoch": 1.4, + "learning_rate": 3.0034202650705428e-05, + "loss": 0.5268, + "step": 32830 + }, + { + "epoch": 1.4, + "learning_rate": 3.0012825994014536e-05, + "loss": 0.5343, + "step": 32840 + }, + { + "epoch": 1.4, + "learning_rate": 2.9991449337323647e-05, + "loss": 0.5715, + "step": 32850 + }, + { + "epoch": 1.4, + "learning_rate": 2.997007268063275e-05, + "loss": 0.5162, + "step": 32860 + }, + { + "epoch": 1.4, + "learning_rate": 2.9948696023941856e-05, + "loss": 0.5786, + "step": 32870 + }, + { + "epoch": 1.4, + "learning_rate": 2.992731936725096e-05, + "loss": 0.5312, + "step": 32880 + }, + { + "epoch": 1.4, + "learning_rate": 2.990594271056007e-05, + "loss": 0.5359, + "step": 32890 + }, + { + "epoch": 1.4, + "learning_rate": 2.988456605386918e-05, + "loss": 0.5594, + "step": 32900 + }, + { + "epoch": 1.4, + "learning_rate": 2.986318939717828e-05, + "loss": 0.5146, + "step": 32910 + }, + { + "epoch": 1.4, + "learning_rate": 2.984181274048739e-05, + "loss": 0.5782, + "step": 32920 + }, + { + "epoch": 1.4, + "learning_rate": 2.9820436083796494e-05, + "loss": 0.5227, + "step": 32930 + }, + { + "epoch": 1.41, + "learning_rate": 2.97990594271056e-05, + "loss": 0.5331, + "step": 32940 + }, + { + "epoch": 1.41, + "learning_rate": 2.977768277041471e-05, + "loss": 0.5606, + "step": 32950 + }, + { + "epoch": 1.41, + "learning_rate": 2.9756306113723814e-05, + "loss": 0.5165, + "step": 32960 + }, + { + "epoch": 1.41, + "learning_rate": 2.9734929457032922e-05, + "loss": 0.5736, + "step": 32970 + }, + { + "epoch": 1.41, + "learning_rate": 2.9713552800342026e-05, + "loss": 0.5159, + "step": 32980 + }, + { + "epoch": 1.41, + "learning_rate": 2.9692176143651134e-05, + "loss": 0.5341, + "step": 32990 + }, + { + "epoch": 1.41, + "learning_rate": 2.9670799486960242e-05, + "loss": 0.5564, + "step": 33000 + }, + { + "epoch": 1.41, + "learning_rate": 2.9649422830269347e-05, + "loss": 0.5148, + "step": 33010 + }, + { + "epoch": 1.41, + "learning_rate": 2.9628046173578454e-05, + "loss": 0.5668, + "step": 33020 + }, + { + "epoch": 1.41, + "learning_rate": 2.960666951688756e-05, + "loss": 0.531, + "step": 33030 + }, + { + "epoch": 1.41, + "learning_rate": 2.9585292860196667e-05, + "loss": 0.5342, + "step": 33040 + }, + { + "epoch": 1.41, + "learning_rate": 2.956391620350577e-05, + "loss": 0.5591, + "step": 33050 + }, + { + "epoch": 1.41, + "learning_rate": 2.954253954681488e-05, + "loss": 0.504, + "step": 33060 + }, + { + "epoch": 1.41, + "learning_rate": 2.9521162890123987e-05, + "loss": 0.5767, + "step": 33070 + }, + { + "epoch": 1.41, + "learning_rate": 2.9499786233433092e-05, + "loss": 0.5295, + "step": 33080 + }, + { + "epoch": 1.41, + "learning_rate": 2.94784095767422e-05, + "loss": 0.5222, + "step": 33090 + }, + { + "epoch": 1.41, + "learning_rate": 2.9457032920051304e-05, + "loss": 0.5504, + "step": 33100 + }, + { + "epoch": 1.41, + "learning_rate": 2.9435656263360412e-05, + "loss": 0.5076, + "step": 33110 + }, + { + "epoch": 1.41, + "learning_rate": 2.941427960666952e-05, + "loss": 0.5766, + "step": 33120 + }, + { + "epoch": 1.41, + "learning_rate": 2.9392902949978624e-05, + "loss": 0.5286, + "step": 33130 + }, + { + "epoch": 1.41, + "learning_rate": 2.9371526293287732e-05, + "loss": 0.543, + "step": 33140 + }, + { + "epoch": 1.41, + "learning_rate": 2.9350149636596837e-05, + "loss": 0.5627, + "step": 33150 + }, + { + "epoch": 1.41, + "learning_rate": 2.9328772979905945e-05, + "loss": 0.5135, + "step": 33160 + }, + { + "epoch": 1.42, + "learning_rate": 2.9307396323215053e-05, + "loss": 0.5714, + "step": 33170 + }, + { + "epoch": 1.42, + "learning_rate": 2.9286019666524157e-05, + "loss": 0.5228, + "step": 33180 + }, + { + "epoch": 1.42, + "learning_rate": 2.9264643009833265e-05, + "loss": 0.5269, + "step": 33190 + }, + { + "epoch": 1.42, + "learning_rate": 2.9243266353142366e-05, + "loss": 0.5536, + "step": 33200 + }, + { + "epoch": 1.42, + "learning_rate": 2.9221889696451477e-05, + "loss": 0.5151, + "step": 33210 + }, + { + "epoch": 1.42, + "learning_rate": 2.9200513039760585e-05, + "loss": 0.573, + "step": 33220 + }, + { + "epoch": 1.42, + "learning_rate": 2.917913638306969e-05, + "loss": 0.5246, + "step": 33230 + }, + { + "epoch": 1.42, + "learning_rate": 2.9157759726378798e-05, + "loss": 0.5243, + "step": 33240 + }, + { + "epoch": 1.42, + "learning_rate": 2.91363830696879e-05, + "loss": 0.5613, + "step": 33250 + }, + { + "epoch": 1.42, + "learning_rate": 2.911500641299701e-05, + "loss": 0.5062, + "step": 33260 + }, + { + "epoch": 1.42, + "learning_rate": 2.9093629756306118e-05, + "loss": 0.5688, + "step": 33270 + }, + { + "epoch": 1.42, + "learning_rate": 2.9072253099615223e-05, + "loss": 0.5271, + "step": 33280 + }, + { + "epoch": 1.42, + "learning_rate": 2.905087644292433e-05, + "loss": 0.5306, + "step": 33290 + }, + { + "epoch": 1.42, + "learning_rate": 2.902949978623343e-05, + "loss": 0.5638, + "step": 33300 + }, + { + "epoch": 1.42, + "learning_rate": 2.9008123129542543e-05, + "loss": 0.5098, + "step": 33310 + }, + { + "epoch": 1.42, + "learning_rate": 2.8986746472851644e-05, + "loss": 0.5746, + "step": 33320 + }, + { + "epoch": 1.42, + "learning_rate": 2.8965369816160752e-05, + "loss": 0.5254, + "step": 33330 + }, + { + "epoch": 1.42, + "learning_rate": 2.8943993159469863e-05, + "loss": 0.5336, + "step": 33340 + }, + { + "epoch": 1.42, + "learning_rate": 2.8922616502778964e-05, + "loss": 0.564, + "step": 33350 + }, + { + "epoch": 1.42, + "learning_rate": 2.8901239846088076e-05, + "loss": 0.5116, + "step": 33360 + }, + { + "epoch": 1.42, + "learning_rate": 2.8879863189397177e-05, + "loss": 0.5709, + "step": 33370 + }, + { + "epoch": 1.42, + "learning_rate": 2.8858486532706285e-05, + "loss": 0.5363, + "step": 33380 + }, + { + "epoch": 1.42, + "learning_rate": 2.8837109876015396e-05, + "loss": 0.5337, + "step": 33390 + }, + { + "epoch": 1.42, + "learning_rate": 2.8815733219324497e-05, + "loss": 0.5698, + "step": 33400 + }, + { + "epoch": 1.43, + "learning_rate": 2.8794356562633605e-05, + "loss": 0.5178, + "step": 33410 + }, + { + "epoch": 1.43, + "learning_rate": 2.877297990594271e-05, + "loss": 0.5746, + "step": 33420 + }, + { + "epoch": 1.43, + "learning_rate": 2.8751603249251817e-05, + "loss": 0.5183, + "step": 33430 + }, + { + "epoch": 1.43, + "learning_rate": 2.873022659256093e-05, + "loss": 0.5262, + "step": 33440 + }, + { + "epoch": 1.43, + "learning_rate": 2.870884993587003e-05, + "loss": 0.5651, + "step": 33450 + }, + { + "epoch": 1.43, + "learning_rate": 2.8687473279179138e-05, + "loss": 0.5155, + "step": 33460 + }, + { + "epoch": 1.43, + "learning_rate": 2.8666096622488242e-05, + "loss": 0.5805, + "step": 33470 + }, + { + "epoch": 1.43, + "learning_rate": 2.864471996579735e-05, + "loss": 0.5248, + "step": 33480 + }, + { + "epoch": 1.43, + "learning_rate": 2.8623343309106458e-05, + "loss": 0.5344, + "step": 33490 + }, + { + "epoch": 1.43, + "learning_rate": 2.8601966652415562e-05, + "loss": 0.5593, + "step": 33500 + }, + { + "epoch": 1.43, + "learning_rate": 2.858058999572467e-05, + "loss": 0.5115, + "step": 33510 + }, + { + "epoch": 1.43, + "learning_rate": 2.8559213339033775e-05, + "loss": 0.5824, + "step": 33520 + }, + { + "epoch": 1.43, + "learning_rate": 2.8537836682342883e-05, + "loss": 0.5263, + "step": 33530 + }, + { + "epoch": 1.43, + "learning_rate": 2.851646002565199e-05, + "loss": 0.5296, + "step": 33540 + }, + { + "epoch": 1.43, + "learning_rate": 2.8495083368961095e-05, + "loss": 0.5629, + "step": 33550 + }, + { + "epoch": 1.43, + "learning_rate": 2.8473706712270203e-05, + "loss": 0.5067, + "step": 33560 + }, + { + "epoch": 1.43, + "learning_rate": 2.8452330055579308e-05, + "loss": 0.5842, + "step": 33570 + }, + { + "epoch": 1.43, + "learning_rate": 2.8430953398888416e-05, + "loss": 0.5331, + "step": 33580 + }, + { + "epoch": 1.43, + "learning_rate": 2.840957674219752e-05, + "loss": 0.533, + "step": 33590 + }, + { + "epoch": 1.43, + "learning_rate": 2.8388200085506628e-05, + "loss": 0.564, + "step": 33600 + }, + { + "epoch": 1.43, + "learning_rate": 2.8366823428815736e-05, + "loss": 0.512, + "step": 33610 + }, + { + "epoch": 1.43, + "learning_rate": 2.834544677212484e-05, + "loss": 0.5764, + "step": 33620 + }, + { + "epoch": 1.43, + "learning_rate": 2.8324070115433948e-05, + "loss": 0.5182, + "step": 33630 + }, + { + "epoch": 1.44, + "learning_rate": 2.8302693458743053e-05, + "loss": 0.5308, + "step": 33640 + }, + { + "epoch": 1.44, + "learning_rate": 2.828131680205216e-05, + "loss": 0.5644, + "step": 33650 + }, + { + "epoch": 1.44, + "learning_rate": 2.825994014536127e-05, + "loss": 0.5179, + "step": 33660 + }, + { + "epoch": 1.44, + "learning_rate": 2.8238563488670373e-05, + "loss": 0.5764, + "step": 33670 + }, + { + "epoch": 1.44, + "learning_rate": 2.821718683197948e-05, + "loss": 0.5243, + "step": 33680 + }, + { + "epoch": 1.44, + "learning_rate": 2.8195810175288585e-05, + "loss": 0.5313, + "step": 33690 + }, + { + "epoch": 1.44, + "learning_rate": 2.8174433518597693e-05, + "loss": 0.55, + "step": 33700 + }, + { + "epoch": 1.44, + "learning_rate": 2.81530568619068e-05, + "loss": 0.5085, + "step": 33710 + }, + { + "epoch": 1.44, + "learning_rate": 2.8131680205215906e-05, + "loss": 0.5797, + "step": 33720 + }, + { + "epoch": 1.44, + "learning_rate": 2.8110303548525014e-05, + "loss": 0.5286, + "step": 33730 + }, + { + "epoch": 1.44, + "learning_rate": 2.8088926891834115e-05, + "loss": 0.5265, + "step": 33740 + }, + { + "epoch": 1.44, + "learning_rate": 2.8067550235143226e-05, + "loss": 0.563, + "step": 33750 + }, + { + "epoch": 1.44, + "learning_rate": 2.8046173578452334e-05, + "loss": 0.5141, + "step": 33760 + }, + { + "epoch": 1.44, + "learning_rate": 2.802479692176144e-05, + "loss": 0.5775, + "step": 33770 + }, + { + "epoch": 1.44, + "learning_rate": 2.8003420265070546e-05, + "loss": 0.5321, + "step": 33780 + }, + { + "epoch": 1.44, + "learning_rate": 2.7982043608379648e-05, + "loss": 0.5328, + "step": 33790 + }, + { + "epoch": 1.44, + "learning_rate": 2.796066695168876e-05, + "loss": 0.5606, + "step": 33800 + }, + { + "epoch": 1.44, + "learning_rate": 2.7939290294997867e-05, + "loss": 0.5112, + "step": 33810 + }, + { + "epoch": 1.44, + "learning_rate": 2.7917913638306968e-05, + "loss": 0.5636, + "step": 33820 + }, + { + "epoch": 1.44, + "learning_rate": 2.789653698161608e-05, + "loss": 0.5164, + "step": 33830 + }, + { + "epoch": 1.44, + "learning_rate": 2.787516032492518e-05, + "loss": 0.5254, + "step": 33840 + }, + { + "epoch": 1.44, + "learning_rate": 2.785378366823429e-05, + "loss": 0.5659, + "step": 33850 + }, + { + "epoch": 1.44, + "learning_rate": 2.7832407011543393e-05, + "loss": 0.5072, + "step": 33860 + }, + { + "epoch": 1.44, + "learning_rate": 2.78110303548525e-05, + "loss": 0.5785, + "step": 33870 + }, + { + "epoch": 1.45, + "learning_rate": 2.7789653698161612e-05, + "loss": 0.5266, + "step": 33880 + }, + { + "epoch": 1.45, + "learning_rate": 2.7768277041470713e-05, + "loss": 0.5279, + "step": 33890 + }, + { + "epoch": 1.45, + "learning_rate": 2.7746900384779824e-05, + "loss": 0.5596, + "step": 33900 + }, + { + "epoch": 1.45, + "learning_rate": 2.7725523728088925e-05, + "loss": 0.5146, + "step": 33910 + }, + { + "epoch": 1.45, + "learning_rate": 2.7704147071398033e-05, + "loss": 0.5722, + "step": 33920 + }, + { + "epoch": 1.45, + "learning_rate": 2.7682770414707145e-05, + "loss": 0.5152, + "step": 33930 + }, + { + "epoch": 1.45, + "learning_rate": 2.7661393758016246e-05, + "loss": 0.5321, + "step": 33940 + }, + { + "epoch": 1.45, + "learning_rate": 2.7640017101325354e-05, + "loss": 0.5576, + "step": 33950 + }, + { + "epoch": 1.45, + "learning_rate": 2.7618640444634458e-05, + "loss": 0.5005, + "step": 33960 + }, + { + "epoch": 1.45, + "learning_rate": 2.7597263787943566e-05, + "loss": 0.5738, + "step": 33970 + }, + { + "epoch": 1.45, + "learning_rate": 2.7575887131252677e-05, + "loss": 0.5172, + "step": 33980 + }, + { + "epoch": 1.45, + "learning_rate": 2.755451047456178e-05, + "loss": 0.5321, + "step": 33990 + }, + { + "epoch": 1.45, + "learning_rate": 2.7533133817870886e-05, + "loss": 0.5577, + "step": 34000 + }, + { + "epoch": 1.45, + "learning_rate": 2.751175716117999e-05, + "loss": 0.5174, + "step": 34010 + }, + { + "epoch": 1.45, + "learning_rate": 2.74903805044891e-05, + "loss": 0.5681, + "step": 34020 + }, + { + "epoch": 1.45, + "learning_rate": 2.7469003847798207e-05, + "loss": 0.5274, + "step": 34030 + }, + { + "epoch": 1.45, + "learning_rate": 2.744762719110731e-05, + "loss": 0.5365, + "step": 34040 + }, + { + "epoch": 1.45, + "learning_rate": 2.742625053441642e-05, + "loss": 0.5657, + "step": 34050 + }, + { + "epoch": 1.45, + "learning_rate": 2.7404873877725524e-05, + "loss": 0.5049, + "step": 34060 + }, + { + "epoch": 1.45, + "learning_rate": 2.738349722103463e-05, + "loss": 0.58, + "step": 34070 + }, + { + "epoch": 1.45, + "learning_rate": 2.736212056434374e-05, + "loss": 0.5251, + "step": 34080 + }, + { + "epoch": 1.45, + "learning_rate": 2.7340743907652844e-05, + "loss": 0.5245, + "step": 34090 + }, + { + "epoch": 1.45, + "learning_rate": 2.7319367250961952e-05, + "loss": 0.5592, + "step": 34100 + }, + { + "epoch": 1.46, + "learning_rate": 2.7297990594271056e-05, + "loss": 0.5078, + "step": 34110 + }, + { + "epoch": 1.46, + "learning_rate": 2.7276613937580164e-05, + "loss": 0.5709, + "step": 34120 + }, + { + "epoch": 1.46, + "learning_rate": 2.725523728088927e-05, + "loss": 0.5226, + "step": 34130 + }, + { + "epoch": 1.46, + "learning_rate": 2.7233860624198377e-05, + "loss": 0.5295, + "step": 34140 + }, + { + "epoch": 1.46, + "learning_rate": 2.7212483967507484e-05, + "loss": 0.5535, + "step": 34150 + }, + { + "epoch": 1.46, + "learning_rate": 2.719110731081659e-05, + "loss": 0.5152, + "step": 34160 + }, + { + "epoch": 1.46, + "learning_rate": 2.7169730654125697e-05, + "loss": 0.5658, + "step": 34170 + }, + { + "epoch": 1.46, + "learning_rate": 2.71483539974348e-05, + "loss": 0.5261, + "step": 34180 + }, + { + "epoch": 1.46, + "learning_rate": 2.712697734074391e-05, + "loss": 0.5375, + "step": 34190 + }, + { + "epoch": 1.46, + "learning_rate": 2.7105600684053017e-05, + "loss": 0.5523, + "step": 34200 + }, + { + "epoch": 1.46, + "learning_rate": 2.708422402736212e-05, + "loss": 0.5182, + "step": 34210 + }, + { + "epoch": 1.46, + "learning_rate": 2.706284737067123e-05, + "loss": 0.578, + "step": 34220 + }, + { + "epoch": 1.46, + "learning_rate": 2.7041470713980334e-05, + "loss": 0.5227, + "step": 34230 + }, + { + "epoch": 1.46, + "learning_rate": 2.7020094057289442e-05, + "loss": 0.5195, + "step": 34240 + }, + { + "epoch": 1.46, + "learning_rate": 2.699871740059855e-05, + "loss": 0.5665, + "step": 34250 + }, + { + "epoch": 1.46, + "learning_rate": 2.6977340743907654e-05, + "loss": 0.5152, + "step": 34260 + }, + { + "epoch": 1.46, + "learning_rate": 2.6955964087216762e-05, + "loss": 0.5786, + "step": 34270 + }, + { + "epoch": 1.46, + "learning_rate": 2.6934587430525863e-05, + "loss": 0.5187, + "step": 34280 + }, + { + "epoch": 1.46, + "learning_rate": 2.6913210773834975e-05, + "loss": 0.5336, + "step": 34290 + }, + { + "epoch": 1.46, + "learning_rate": 2.6891834117144083e-05, + "loss": 0.5545, + "step": 34300 + }, + { + "epoch": 1.46, + "learning_rate": 2.6870457460453187e-05, + "loss": 0.5191, + "step": 34310 + }, + { + "epoch": 1.46, + "learning_rate": 2.6849080803762295e-05, + "loss": 0.5709, + "step": 34320 + }, + { + "epoch": 1.46, + "learning_rate": 2.6827704147071396e-05, + "loss": 0.5237, + "step": 34330 + }, + { + "epoch": 1.46, + "learning_rate": 2.6806327490380507e-05, + "loss": 0.531, + "step": 34340 + }, + { + "epoch": 1.47, + "learning_rate": 2.6784950833689615e-05, + "loss": 0.5636, + "step": 34350 + }, + { + "epoch": 1.47, + "learning_rate": 2.6763574176998716e-05, + "loss": 0.519, + "step": 34360 + }, + { + "epoch": 1.47, + "learning_rate": 2.6742197520307828e-05, + "loss": 0.5738, + "step": 34370 + }, + { + "epoch": 1.47, + "learning_rate": 2.672082086361693e-05, + "loss": 0.5341, + "step": 34380 + }, + { + "epoch": 1.47, + "learning_rate": 2.669944420692604e-05, + "loss": 0.5372, + "step": 34390 + }, + { + "epoch": 1.47, + "learning_rate": 2.667806755023514e-05, + "loss": 0.5558, + "step": 34400 + }, + { + "epoch": 1.47, + "learning_rate": 2.665669089354425e-05, + "loss": 0.5109, + "step": 34410 + }, + { + "epoch": 1.47, + "learning_rate": 2.663531423685336e-05, + "loss": 0.5724, + "step": 34420 + }, + { + "epoch": 1.47, + "learning_rate": 2.661393758016246e-05, + "loss": 0.5174, + "step": 34430 + }, + { + "epoch": 1.47, + "learning_rate": 2.659256092347157e-05, + "loss": 0.5345, + "step": 34440 + }, + { + "epoch": 1.47, + "learning_rate": 2.6571184266780674e-05, + "loss": 0.5633, + "step": 34450 + }, + { + "epoch": 1.47, + "learning_rate": 2.6549807610089782e-05, + "loss": 0.517, + "step": 34460 + }, + { + "epoch": 1.47, + "learning_rate": 2.6528430953398893e-05, + "loss": 0.5758, + "step": 34470 + }, + { + "epoch": 1.47, + "learning_rate": 2.6507054296707994e-05, + "loss": 0.5154, + "step": 34480 + }, + { + "epoch": 1.47, + "learning_rate": 2.6485677640017102e-05, + "loss": 0.534, + "step": 34490 + }, + { + "epoch": 1.47, + "learning_rate": 2.6464300983326207e-05, + "loss": 0.5624, + "step": 34500 + }, + { + "epoch": 1.47, + "learning_rate": 2.6442924326635315e-05, + "loss": 0.5152, + "step": 34510 + }, + { + "epoch": 1.47, + "learning_rate": 2.6421547669944426e-05, + "loss": 0.5726, + "step": 34520 + }, + { + "epoch": 1.47, + "learning_rate": 2.6400171013253527e-05, + "loss": 0.5289, + "step": 34530 + }, + { + "epoch": 1.47, + "learning_rate": 2.6378794356562635e-05, + "loss": 0.5271, + "step": 34540 + }, + { + "epoch": 1.47, + "learning_rate": 2.635741769987174e-05, + "loss": 0.5541, + "step": 34550 + }, + { + "epoch": 1.47, + "learning_rate": 2.6336041043180847e-05, + "loss": 0.5079, + "step": 34560 + }, + { + "epoch": 1.47, + "learning_rate": 2.6314664386489955e-05, + "loss": 0.5677, + "step": 34570 + }, + { + "epoch": 1.48, + "learning_rate": 2.629328772979906e-05, + "loss": 0.5311, + "step": 34580 + }, + { + "epoch": 1.48, + "learning_rate": 2.6271911073108168e-05, + "loss": 0.5256, + "step": 34590 + }, + { + "epoch": 1.48, + "learning_rate": 2.6250534416417272e-05, + "loss": 0.5566, + "step": 34600 + }, + { + "epoch": 1.48, + "learning_rate": 2.622915775972638e-05, + "loss": 0.5126, + "step": 34610 + }, + { + "epoch": 1.48, + "learning_rate": 2.6207781103035488e-05, + "loss": 0.5754, + "step": 34620 + }, + { + "epoch": 1.48, + "learning_rate": 2.6186404446344592e-05, + "loss": 0.5214, + "step": 34630 + }, + { + "epoch": 1.48, + "learning_rate": 2.61650277896537e-05, + "loss": 0.5213, + "step": 34640 + }, + { + "epoch": 1.48, + "learning_rate": 2.6143651132962805e-05, + "loss": 0.5616, + "step": 34650 + }, + { + "epoch": 1.48, + "learning_rate": 2.6122274476271913e-05, + "loss": 0.5136, + "step": 34660 + }, + { + "epoch": 1.48, + "learning_rate": 2.6100897819581017e-05, + "loss": 0.5736, + "step": 34670 + }, + { + "epoch": 1.48, + "learning_rate": 2.6079521162890125e-05, + "loss": 0.5249, + "step": 34680 + }, + { + "epoch": 1.48, + "learning_rate": 2.6058144506199233e-05, + "loss": 0.5254, + "step": 34690 + }, + { + "epoch": 1.48, + "learning_rate": 2.6036767849508338e-05, + "loss": 0.5509, + "step": 34700 + }, + { + "epoch": 1.48, + "learning_rate": 2.6015391192817445e-05, + "loss": 0.5115, + "step": 34710 + }, + { + "epoch": 1.48, + "learning_rate": 2.599401453612655e-05, + "loss": 0.5785, + "step": 34720 + }, + { + "epoch": 1.48, + "learning_rate": 2.5972637879435658e-05, + "loss": 0.525, + "step": 34730 + }, + { + "epoch": 1.48, + "learning_rate": 2.5951261222744766e-05, + "loss": 0.5265, + "step": 34740 + }, + { + "epoch": 1.48, + "learning_rate": 2.592988456605387e-05, + "loss": 0.5641, + "step": 34750 + }, + { + "epoch": 1.48, + "learning_rate": 2.5908507909362978e-05, + "loss": 0.5076, + "step": 34760 + }, + { + "epoch": 1.48, + "learning_rate": 2.5887131252672083e-05, + "loss": 0.5688, + "step": 34770 + }, + { + "epoch": 1.48, + "learning_rate": 2.586575459598119e-05, + "loss": 0.5368, + "step": 34780 + }, + { + "epoch": 1.48, + "learning_rate": 2.58443779392903e-05, + "loss": 0.5348, + "step": 34790 + }, + { + "epoch": 1.48, + "learning_rate": 2.5823001282599403e-05, + "loss": 0.5631, + "step": 34800 + }, + { + "epoch": 1.49, + "learning_rate": 2.580162462590851e-05, + "loss": 0.5054, + "step": 34810 + }, + { + "epoch": 1.49, + "learning_rate": 2.5780247969217612e-05, + "loss": 0.5692, + "step": 34820 + }, + { + "epoch": 1.49, + "learning_rate": 2.5758871312526723e-05, + "loss": 0.5298, + "step": 34830 + }, + { + "epoch": 1.49, + "learning_rate": 2.573749465583583e-05, + "loss": 0.5358, + "step": 34840 + }, + { + "epoch": 1.49, + "learning_rate": 2.5716117999144936e-05, + "loss": 0.557, + "step": 34850 + }, + { + "epoch": 1.49, + "learning_rate": 2.5694741342454044e-05, + "loss": 0.5155, + "step": 34860 + }, + { + "epoch": 1.49, + "learning_rate": 2.5673364685763145e-05, + "loss": 0.5732, + "step": 34870 + }, + { + "epoch": 1.49, + "learning_rate": 2.5651988029072256e-05, + "loss": 0.5141, + "step": 34880 + }, + { + "epoch": 1.49, + "learning_rate": 2.5630611372381364e-05, + "loss": 0.5327, + "step": 34890 + }, + { + "epoch": 1.49, + "learning_rate": 2.5609234715690465e-05, + "loss": 0.5547, + "step": 34900 + }, + { + "epoch": 1.49, + "learning_rate": 2.5587858058999576e-05, + "loss": 0.5121, + "step": 34910 + }, + { + "epoch": 1.49, + "learning_rate": 2.5566481402308677e-05, + "loss": 0.5634, + "step": 34920 + }, + { + "epoch": 1.49, + "learning_rate": 2.554510474561779e-05, + "loss": 0.5217, + "step": 34930 + }, + { + "epoch": 1.49, + "learning_rate": 2.552372808892689e-05, + "loss": 0.5303, + "step": 34940 + }, + { + "epoch": 1.49, + "learning_rate": 2.5502351432235998e-05, + "loss": 0.5592, + "step": 34950 + }, + { + "epoch": 1.49, + "learning_rate": 2.548097477554511e-05, + "loss": 0.5127, + "step": 34960 + }, + { + "epoch": 1.49, + "learning_rate": 2.545959811885421e-05, + "loss": 0.5743, + "step": 34970 + }, + { + "epoch": 1.49, + "learning_rate": 2.5438221462163318e-05, + "loss": 0.5207, + "step": 34980 + }, + { + "epoch": 1.49, + "learning_rate": 2.5416844805472423e-05, + "loss": 0.5298, + "step": 34990 + }, + { + "epoch": 1.49, + "learning_rate": 2.539546814878153e-05, + "loss": 0.554, + "step": 35000 + }, + { + "epoch": 1.49, + "learning_rate": 2.5374091492090642e-05, + "loss": 0.5228, + "step": 35010 + }, + { + "epoch": 1.49, + "learning_rate": 2.5352714835399743e-05, + "loss": 0.5705, + "step": 35020 + }, + { + "epoch": 1.49, + "learning_rate": 2.533133817870885e-05, + "loss": 0.5192, + "step": 35030 + }, + { + "epoch": 1.49, + "learning_rate": 2.5309961522017955e-05, + "loss": 0.5324, + "step": 35040 + }, + { + "epoch": 1.5, + "learning_rate": 2.5288584865327063e-05, + "loss": 0.563, + "step": 35050 + }, + { + "epoch": 1.5, + "learning_rate": 2.526720820863617e-05, + "loss": 0.5167, + "step": 35060 + }, + { + "epoch": 1.5, + "learning_rate": 2.5245831551945276e-05, + "loss": 0.5698, + "step": 35070 + }, + { + "epoch": 1.5, + "learning_rate": 2.5224454895254384e-05, + "loss": 0.5215, + "step": 35080 + }, + { + "epoch": 1.5, + "learning_rate": 2.5203078238563488e-05, + "loss": 0.5319, + "step": 35090 + }, + { + "epoch": 1.5, + "learning_rate": 2.5181701581872596e-05, + "loss": 0.561, + "step": 35100 + }, + { + "epoch": 1.5, + "learning_rate": 2.5160324925181704e-05, + "loss": 0.5154, + "step": 35110 + }, + { + "epoch": 1.5, + "learning_rate": 2.513894826849081e-05, + "loss": 0.5704, + "step": 35120 + }, + { + "epoch": 1.5, + "learning_rate": 2.5117571611799916e-05, + "loss": 0.5174, + "step": 35130 + }, + { + "epoch": 1.5, + "learning_rate": 2.509619495510902e-05, + "loss": 0.5323, + "step": 35140 + }, + { + "epoch": 1.5, + "learning_rate": 2.507481829841813e-05, + "loss": 0.5601, + "step": 35150 + }, + { + "epoch": 1.5, + "learning_rate": 2.5053441641727237e-05, + "loss": 0.5, + "step": 35160 + }, + { + "epoch": 1.5, + "learning_rate": 2.503206498503634e-05, + "loss": 0.564, + "step": 35170 + }, + { + "epoch": 1.5, + "learning_rate": 2.501068832834545e-05, + "loss": 0.5187, + "step": 35180 + }, + { + "epoch": 1.5, + "learning_rate": 2.4989311671654557e-05, + "loss": 0.5252, + "step": 35190 + }, + { + "epoch": 1.5, + "learning_rate": 2.496793501496366e-05, + "loss": 0.5566, + "step": 35200 + }, + { + "epoch": 1.5, + "learning_rate": 2.4946558358272766e-05, + "loss": 0.5152, + "step": 35210 + }, + { + "epoch": 1.5, + "learning_rate": 2.4925181701581874e-05, + "loss": 0.5704, + "step": 35220 + }, + { + "epoch": 1.5, + "learning_rate": 2.4903805044890978e-05, + "loss": 0.5178, + "step": 35230 + }, + { + "epoch": 1.5, + "learning_rate": 2.488242838820009e-05, + "loss": 0.5296, + "step": 35240 + }, + { + "epoch": 1.5, + "learning_rate": 2.4861051731509194e-05, + "loss": 0.5649, + "step": 35250 + }, + { + "epoch": 1.5, + "learning_rate": 2.48396750748183e-05, + "loss": 0.507, + "step": 35260 + }, + { + "epoch": 1.5, + "learning_rate": 2.4818298418127406e-05, + "loss": 0.5789, + "step": 35270 + }, + { + "epoch": 1.51, + "learning_rate": 2.479692176143651e-05, + "loss": 0.53, + "step": 35280 + }, + { + "epoch": 1.51, + "learning_rate": 2.477554510474562e-05, + "loss": 0.5291, + "step": 35290 + }, + { + "epoch": 1.51, + "learning_rate": 2.4754168448054727e-05, + "loss": 0.5581, + "step": 35300 + }, + { + "epoch": 1.51, + "learning_rate": 2.473279179136383e-05, + "loss": 0.5144, + "step": 35310 + }, + { + "epoch": 1.51, + "learning_rate": 2.471141513467294e-05, + "loss": 0.578, + "step": 35320 + }, + { + "epoch": 1.51, + "learning_rate": 2.4690038477982044e-05, + "loss": 0.5249, + "step": 35330 + }, + { + "epoch": 1.51, + "learning_rate": 2.466866182129115e-05, + "loss": 0.5376, + "step": 35340 + }, + { + "epoch": 1.51, + "learning_rate": 2.464728516460026e-05, + "loss": 0.5595, + "step": 35350 + }, + { + "epoch": 1.51, + "learning_rate": 2.4625908507909364e-05, + "loss": 0.5096, + "step": 35360 + }, + { + "epoch": 1.51, + "learning_rate": 2.4604531851218472e-05, + "loss": 0.5741, + "step": 35370 + }, + { + "epoch": 1.51, + "learning_rate": 2.4583155194527576e-05, + "loss": 0.5225, + "step": 35380 + }, + { + "epoch": 1.51, + "learning_rate": 2.4561778537836684e-05, + "loss": 0.5297, + "step": 35390 + }, + { + "epoch": 1.51, + "learning_rate": 2.454040188114579e-05, + "loss": 0.5614, + "step": 35400 + }, + { + "epoch": 1.51, + "learning_rate": 2.4519025224454897e-05, + "loss": 0.516, + "step": 35410 + }, + { + "epoch": 1.51, + "learning_rate": 2.4497648567764005e-05, + "loss": 0.5679, + "step": 35420 + }, + { + "epoch": 1.51, + "learning_rate": 2.447627191107311e-05, + "loss": 0.5222, + "step": 35430 + }, + { + "epoch": 1.51, + "learning_rate": 2.4454895254382214e-05, + "loss": 0.5355, + "step": 35440 + }, + { + "epoch": 1.51, + "learning_rate": 2.443351859769132e-05, + "loss": 0.5556, + "step": 35450 + }, + { + "epoch": 1.51, + "learning_rate": 2.441214194100043e-05, + "loss": 0.5109, + "step": 35460 + }, + { + "epoch": 1.51, + "learning_rate": 2.4390765284309537e-05, + "loss": 0.5739, + "step": 35470 + }, + { + "epoch": 1.51, + "learning_rate": 2.4369388627618642e-05, + "loss": 0.5287, + "step": 35480 + }, + { + "epoch": 1.51, + "learning_rate": 2.4348011970927746e-05, + "loss": 0.5242, + "step": 35490 + }, + { + "epoch": 1.51, + "learning_rate": 2.4326635314236854e-05, + "loss": 0.547, + "step": 35500 + }, + { + "epoch": 1.51, + "learning_rate": 2.4305258657545962e-05, + "loss": 0.4988, + "step": 35510 + }, + { + "epoch": 1.52, + "learning_rate": 2.4283882000855067e-05, + "loss": 0.5665, + "step": 35520 + }, + { + "epoch": 1.52, + "learning_rate": 2.4262505344164175e-05, + "loss": 0.5159, + "step": 35530 + }, + { + "epoch": 1.52, + "learning_rate": 2.424112868747328e-05, + "loss": 0.5279, + "step": 35540 + }, + { + "epoch": 1.52, + "learning_rate": 2.4219752030782387e-05, + "loss": 0.5495, + "step": 35550 + }, + { + "epoch": 1.52, + "learning_rate": 2.419837537409149e-05, + "loss": 0.5146, + "step": 35560 + }, + { + "epoch": 1.52, + "learning_rate": 2.41769987174006e-05, + "loss": 0.5709, + "step": 35570 + }, + { + "epoch": 1.52, + "learning_rate": 2.4155622060709707e-05, + "loss": 0.5151, + "step": 35580 + }, + { + "epoch": 1.52, + "learning_rate": 2.4134245404018812e-05, + "loss": 0.5375, + "step": 35590 + }, + { + "epoch": 1.52, + "learning_rate": 2.411286874732792e-05, + "loss": 0.5589, + "step": 35600 + }, + { + "epoch": 1.52, + "learning_rate": 2.4091492090637024e-05, + "loss": 0.5115, + "step": 35610 + }, + { + "epoch": 1.52, + "learning_rate": 2.4070115433946132e-05, + "loss": 0.5673, + "step": 35620 + }, + { + "epoch": 1.52, + "learning_rate": 2.404873877725524e-05, + "loss": 0.5278, + "step": 35630 + }, + { + "epoch": 1.52, + "learning_rate": 2.4027362120564345e-05, + "loss": 0.5312, + "step": 35640 + }, + { + "epoch": 1.52, + "learning_rate": 2.4005985463873452e-05, + "loss": 0.5608, + "step": 35650 + }, + { + "epoch": 1.52, + "learning_rate": 2.3984608807182557e-05, + "loss": 0.5019, + "step": 35660 + }, + { + "epoch": 1.52, + "learning_rate": 2.396323215049166e-05, + "loss": 0.5585, + "step": 35670 + }, + { + "epoch": 1.52, + "learning_rate": 2.3941855493800773e-05, + "loss": 0.5185, + "step": 35680 + }, + { + "epoch": 1.52, + "learning_rate": 2.3920478837109877e-05, + "loss": 0.5291, + "step": 35690 + }, + { + "epoch": 1.52, + "learning_rate": 2.3899102180418985e-05, + "loss": 0.5532, + "step": 35700 + }, + { + "epoch": 1.52, + "learning_rate": 2.387772552372809e-05, + "loss": 0.5016, + "step": 35710 + }, + { + "epoch": 1.52, + "learning_rate": 2.3856348867037194e-05, + "loss": 0.5744, + "step": 35720 + }, + { + "epoch": 1.52, + "learning_rate": 2.3834972210346305e-05, + "loss": 0.5189, + "step": 35730 + }, + { + "epoch": 1.52, + "learning_rate": 2.381359555365541e-05, + "loss": 0.5365, + "step": 35740 + }, + { + "epoch": 1.53, + "learning_rate": 2.3792218896964514e-05, + "loss": 0.5557, + "step": 35750 + }, + { + "epoch": 1.53, + "learning_rate": 2.3770842240273622e-05, + "loss": 0.5181, + "step": 35760 + }, + { + "epoch": 1.53, + "learning_rate": 2.3749465583582727e-05, + "loss": 0.5725, + "step": 35770 + }, + { + "epoch": 1.53, + "learning_rate": 2.3728088926891838e-05, + "loss": 0.5176, + "step": 35780 + }, + { + "epoch": 1.53, + "learning_rate": 2.3706712270200943e-05, + "loss": 0.5292, + "step": 35790 + }, + { + "epoch": 1.53, + "learning_rate": 2.3685335613510047e-05, + "loss": 0.5636, + "step": 35800 + }, + { + "epoch": 1.53, + "learning_rate": 2.3663958956819155e-05, + "loss": 0.5078, + "step": 35810 + }, + { + "epoch": 1.53, + "learning_rate": 2.364258230012826e-05, + "loss": 0.5733, + "step": 35820 + }, + { + "epoch": 1.53, + "learning_rate": 2.3621205643437368e-05, + "loss": 0.5194, + "step": 35830 + }, + { + "epoch": 1.53, + "learning_rate": 2.3599828986746475e-05, + "loss": 0.524, + "step": 35840 + }, + { + "epoch": 1.53, + "learning_rate": 2.357845233005558e-05, + "loss": 0.5647, + "step": 35850 + }, + { + "epoch": 1.53, + "learning_rate": 2.3557075673364688e-05, + "loss": 0.5045, + "step": 35860 + }, + { + "epoch": 1.53, + "learning_rate": 2.3535699016673792e-05, + "loss": 0.5716, + "step": 35870 + }, + { + "epoch": 1.53, + "learning_rate": 2.35143223599829e-05, + "loss": 0.5268, + "step": 35880 + }, + { + "epoch": 1.53, + "learning_rate": 2.3492945703292008e-05, + "loss": 0.5302, + "step": 35890 + }, + { + "epoch": 1.53, + "learning_rate": 2.3471569046601113e-05, + "loss": 0.561, + "step": 35900 + }, + { + "epoch": 1.53, + "learning_rate": 2.345019238991022e-05, + "loss": 0.5131, + "step": 35910 + }, + { + "epoch": 1.53, + "learning_rate": 2.3428815733219325e-05, + "loss": 0.566, + "step": 35920 + }, + { + "epoch": 1.53, + "learning_rate": 2.3407439076528433e-05, + "loss": 0.5198, + "step": 35930 + }, + { + "epoch": 1.53, + "learning_rate": 2.3386062419837537e-05, + "loss": 0.5239, + "step": 35940 + }, + { + "epoch": 1.53, + "learning_rate": 2.3364685763146645e-05, + "loss": 0.5678, + "step": 35950 + }, + { + "epoch": 1.53, + "learning_rate": 2.3343309106455753e-05, + "loss": 0.5085, + "step": 35960 + }, + { + "epoch": 1.53, + "learning_rate": 2.3321932449764858e-05, + "loss": 0.5696, + "step": 35970 + }, + { + "epoch": 1.53, + "learning_rate": 2.3300555793073962e-05, + "loss": 0.5329, + "step": 35980 + }, + { + "epoch": 1.54, + "learning_rate": 2.327917913638307e-05, + "loss": 0.5315, + "step": 35990 + }, + { + "epoch": 1.54, + "learning_rate": 2.3257802479692178e-05, + "loss": 0.5494, + "step": 36000 + }, + { + "epoch": 1.54, + "learning_rate": 2.3236425823001286e-05, + "loss": 0.5076, + "step": 36010 + }, + { + "epoch": 1.54, + "learning_rate": 2.321504916631039e-05, + "loss": 0.5642, + "step": 36020 + }, + { + "epoch": 1.54, + "learning_rate": 2.3193672509619495e-05, + "loss": 0.5173, + "step": 36030 + }, + { + "epoch": 1.54, + "learning_rate": 2.3172295852928603e-05, + "loss": 0.531, + "step": 36040 + }, + { + "epoch": 1.54, + "learning_rate": 2.315091919623771e-05, + "loss": 0.5557, + "step": 36050 + }, + { + "epoch": 1.54, + "learning_rate": 2.3129542539546815e-05, + "loss": 0.5031, + "step": 36060 + }, + { + "epoch": 1.54, + "learning_rate": 2.3108165882855923e-05, + "loss": 0.5672, + "step": 36070 + }, + { + "epoch": 1.54, + "learning_rate": 2.3086789226165028e-05, + "loss": 0.5232, + "step": 36080 + }, + { + "epoch": 1.54, + "learning_rate": 2.3065412569474136e-05, + "loss": 0.5402, + "step": 36090 + }, + { + "epoch": 1.54, + "learning_rate": 2.304403591278324e-05, + "loss": 0.5547, + "step": 36100 + }, + { + "epoch": 1.54, + "learning_rate": 2.3022659256092348e-05, + "loss": 0.5044, + "step": 36110 + }, + { + "epoch": 1.54, + "learning_rate": 2.3001282599401456e-05, + "loss": 0.5757, + "step": 36120 + }, + { + "epoch": 1.54, + "learning_rate": 2.297990594271056e-05, + "loss": 0.5126, + "step": 36130 + }, + { + "epoch": 1.54, + "learning_rate": 2.295852928601967e-05, + "loss": 0.5331, + "step": 36140 + }, + { + "epoch": 1.54, + "learning_rate": 2.2937152629328773e-05, + "loss": 0.5594, + "step": 36150 + }, + { + "epoch": 1.54, + "learning_rate": 2.291577597263788e-05, + "loss": 0.5237, + "step": 36160 + }, + { + "epoch": 1.54, + "learning_rate": 2.289439931594699e-05, + "loss": 0.5747, + "step": 36170 + }, + { + "epoch": 1.54, + "learning_rate": 2.2873022659256093e-05, + "loss": 0.523, + "step": 36180 + }, + { + "epoch": 1.54, + "learning_rate": 2.28516460025652e-05, + "loss": 0.534, + "step": 36190 + }, + { + "epoch": 1.54, + "learning_rate": 2.2830269345874306e-05, + "loss": 0.5549, + "step": 36200 + }, + { + "epoch": 1.54, + "learning_rate": 2.280889268918341e-05, + "loss": 0.5067, + "step": 36210 + }, + { + "epoch": 1.55, + "learning_rate": 2.278751603249252e-05, + "loss": 0.5756, + "step": 36220 + }, + { + "epoch": 1.55, + "learning_rate": 2.2766139375801626e-05, + "loss": 0.5255, + "step": 36230 + }, + { + "epoch": 1.55, + "learning_rate": 2.2744762719110734e-05, + "loss": 0.523, + "step": 36240 + }, + { + "epoch": 1.55, + "learning_rate": 2.2723386062419838e-05, + "loss": 0.5588, + "step": 36250 + }, + { + "epoch": 1.55, + "learning_rate": 2.2702009405728943e-05, + "loss": 0.5225, + "step": 36260 + }, + { + "epoch": 1.55, + "learning_rate": 2.2680632749038054e-05, + "loss": 0.5595, + "step": 36270 + }, + { + "epoch": 1.55, + "learning_rate": 2.265925609234716e-05, + "loss": 0.5207, + "step": 36280 + }, + { + "epoch": 1.55, + "learning_rate": 2.2637879435656263e-05, + "loss": 0.5341, + "step": 36290 + }, + { + "epoch": 1.55, + "learning_rate": 2.261650277896537e-05, + "loss": 0.5537, + "step": 36300 + }, + { + "epoch": 1.55, + "learning_rate": 2.2595126122274476e-05, + "loss": 0.5081, + "step": 36310 + }, + { + "epoch": 1.55, + "learning_rate": 2.2573749465583587e-05, + "loss": 0.5695, + "step": 36320 + }, + { + "epoch": 1.55, + "learning_rate": 2.255237280889269e-05, + "loss": 0.5212, + "step": 36330 + }, + { + "epoch": 1.55, + "learning_rate": 2.2530996152201796e-05, + "loss": 0.5268, + "step": 36340 + }, + { + "epoch": 1.55, + "learning_rate": 2.2509619495510904e-05, + "loss": 0.549, + "step": 36350 + }, + { + "epoch": 1.55, + "learning_rate": 2.2488242838820008e-05, + "loss": 0.51, + "step": 36360 + }, + { + "epoch": 1.55, + "learning_rate": 2.2466866182129116e-05, + "loss": 0.5736, + "step": 36370 + }, + { + "epoch": 1.55, + "learning_rate": 2.2445489525438224e-05, + "loss": 0.5176, + "step": 36380 + }, + { + "epoch": 1.55, + "learning_rate": 2.242411286874733e-05, + "loss": 0.5193, + "step": 36390 + }, + { + "epoch": 1.55, + "learning_rate": 2.2402736212056436e-05, + "loss": 0.567, + "step": 36400 + }, + { + "epoch": 1.55, + "learning_rate": 2.238135955536554e-05, + "loss": 0.5099, + "step": 36410 + }, + { + "epoch": 1.55, + "learning_rate": 2.235998289867465e-05, + "loss": 0.5755, + "step": 36420 + }, + { + "epoch": 1.55, + "learning_rate": 2.2338606241983757e-05, + "loss": 0.518, + "step": 36430 + }, + { + "epoch": 1.55, + "learning_rate": 2.231722958529286e-05, + "loss": 0.5304, + "step": 36440 + }, + { + "epoch": 1.56, + "learning_rate": 2.229585292860197e-05, + "loss": 0.5602, + "step": 36450 + }, + { + "epoch": 1.56, + "learning_rate": 2.2274476271911074e-05, + "loss": 0.5131, + "step": 36460 + }, + { + "epoch": 1.56, + "learning_rate": 2.2253099615220178e-05, + "loss": 0.5645, + "step": 36470 + }, + { + "epoch": 1.56, + "learning_rate": 2.2231722958529286e-05, + "loss": 0.5124, + "step": 36480 + }, + { + "epoch": 1.56, + "learning_rate": 2.2210346301838394e-05, + "loss": 0.5274, + "step": 36490 + }, + { + "epoch": 1.56, + "learning_rate": 2.2188969645147502e-05, + "loss": 0.5584, + "step": 36500 + }, + { + "epoch": 1.56, + "learning_rate": 2.2167592988456606e-05, + "loss": 0.5214, + "step": 36510 + }, + { + "epoch": 1.56, + "learning_rate": 2.214621633176571e-05, + "loss": 0.5685, + "step": 36520 + }, + { + "epoch": 1.56, + "learning_rate": 2.212483967507482e-05, + "loss": 0.529, + "step": 36530 + }, + { + "epoch": 1.56, + "learning_rate": 2.2103463018383927e-05, + "loss": 0.5272, + "step": 36540 + }, + { + "epoch": 1.56, + "learning_rate": 2.2082086361693035e-05, + "loss": 0.5577, + "step": 36550 + }, + { + "epoch": 1.56, + "learning_rate": 2.206070970500214e-05, + "loss": 0.509, + "step": 36560 + }, + { + "epoch": 1.56, + "learning_rate": 2.2039333048311244e-05, + "loss": 0.5645, + "step": 36570 + }, + { + "epoch": 1.56, + "learning_rate": 2.201795639162035e-05, + "loss": 0.5213, + "step": 36580 + }, + { + "epoch": 1.56, + "learning_rate": 2.199657973492946e-05, + "loss": 0.5372, + "step": 36590 + }, + { + "epoch": 1.56, + "learning_rate": 2.1975203078238564e-05, + "loss": 0.5591, + "step": 36600 + }, + { + "epoch": 1.56, + "learning_rate": 2.1953826421547672e-05, + "loss": 0.5049, + "step": 36610 + }, + { + "epoch": 1.56, + "learning_rate": 2.1932449764856776e-05, + "loss": 0.5707, + "step": 36620 + }, + { + "epoch": 1.56, + "learning_rate": 2.1911073108165884e-05, + "loss": 0.5234, + "step": 36630 + }, + { + "epoch": 1.56, + "learning_rate": 2.188969645147499e-05, + "loss": 0.5317, + "step": 36640 + }, + { + "epoch": 1.56, + "learning_rate": 2.1868319794784097e-05, + "loss": 0.5657, + "step": 36650 + }, + { + "epoch": 1.56, + "learning_rate": 2.1846943138093205e-05, + "loss": 0.5075, + "step": 36660 + }, + { + "epoch": 1.56, + "learning_rate": 2.182556648140231e-05, + "loss": 0.5682, + "step": 36670 + }, + { + "epoch": 1.56, + "learning_rate": 2.1804189824711417e-05, + "loss": 0.5286, + "step": 36680 + }, + { + "epoch": 1.57, + "learning_rate": 2.178281316802052e-05, + "loss": 0.5222, + "step": 36690 + }, + { + "epoch": 1.57, + "learning_rate": 2.176143651132963e-05, + "loss": 0.5531, + "step": 36700 + }, + { + "epoch": 1.57, + "learning_rate": 2.1740059854638737e-05, + "loss": 0.508, + "step": 36710 + }, + { + "epoch": 1.57, + "learning_rate": 2.1718683197947842e-05, + "loss": 0.5625, + "step": 36720 + }, + { + "epoch": 1.57, + "learning_rate": 2.169730654125695e-05, + "loss": 0.5214, + "step": 36730 + }, + { + "epoch": 1.57, + "learning_rate": 2.1675929884566054e-05, + "loss": 0.5263, + "step": 36740 + }, + { + "epoch": 1.57, + "learning_rate": 2.165455322787516e-05, + "loss": 0.5592, + "step": 36750 + }, + { + "epoch": 1.57, + "learning_rate": 2.163317657118427e-05, + "loss": 0.5034, + "step": 36760 + }, + { + "epoch": 1.57, + "learning_rate": 2.1611799914493374e-05, + "loss": 0.5541, + "step": 36770 + }, + { + "epoch": 1.57, + "learning_rate": 2.159042325780248e-05, + "loss": 0.5204, + "step": 36780 + }, + { + "epoch": 1.57, + "learning_rate": 2.1569046601111587e-05, + "loss": 0.5261, + "step": 36790 + }, + { + "epoch": 1.57, + "learning_rate": 2.154766994442069e-05, + "loss": 0.5638, + "step": 36800 + }, + { + "epoch": 1.57, + "learning_rate": 2.1526293287729803e-05, + "loss": 0.5069, + "step": 36810 + }, + { + "epoch": 1.57, + "learning_rate": 2.1504916631038907e-05, + "loss": 0.5717, + "step": 36820 + }, + { + "epoch": 1.57, + "learning_rate": 2.1483539974348012e-05, + "loss": 0.5233, + "step": 36830 + }, + { + "epoch": 1.57, + "learning_rate": 2.146216331765712e-05, + "loss": 0.5286, + "step": 36840 + }, + { + "epoch": 1.57, + "learning_rate": 2.1440786660966224e-05, + "loss": 0.5526, + "step": 36850 + }, + { + "epoch": 1.57, + "learning_rate": 2.1419410004275335e-05, + "loss": 0.5133, + "step": 36860 + }, + { + "epoch": 1.57, + "learning_rate": 2.139803334758444e-05, + "loss": 0.5688, + "step": 36870 + }, + { + "epoch": 1.57, + "learning_rate": 2.1376656690893544e-05, + "loss": 0.5229, + "step": 36880 + }, + { + "epoch": 1.57, + "learning_rate": 2.1355280034202652e-05, + "loss": 0.523, + "step": 36890 + }, + { + "epoch": 1.57, + "learning_rate": 2.1333903377511757e-05, + "loss": 0.5482, + "step": 36900 + }, + { + "epoch": 1.57, + "learning_rate": 2.1312526720820865e-05, + "loss": 0.5119, + "step": 36910 + }, + { + "epoch": 1.58, + "learning_rate": 2.1291150064129973e-05, + "loss": 0.5647, + "step": 36920 + }, + { + "epoch": 1.58, + "learning_rate": 2.1269773407439077e-05, + "loss": 0.5249, + "step": 36930 + }, + { + "epoch": 1.58, + "learning_rate": 2.1248396750748185e-05, + "loss": 0.5304, + "step": 36940 + }, + { + "epoch": 1.58, + "learning_rate": 2.122702009405729e-05, + "loss": 0.5616, + "step": 36950 + }, + { + "epoch": 1.58, + "learning_rate": 2.1205643437366397e-05, + "loss": 0.5084, + "step": 36960 + }, + { + "epoch": 1.58, + "learning_rate": 2.1184266780675505e-05, + "loss": 0.5712, + "step": 36970 + }, + { + "epoch": 1.58, + "learning_rate": 2.116289012398461e-05, + "loss": 0.5205, + "step": 36980 + }, + { + "epoch": 1.58, + "learning_rate": 2.1141513467293718e-05, + "loss": 0.5253, + "step": 36990 + }, + { + "epoch": 1.58, + "learning_rate": 2.1120136810602822e-05, + "loss": 0.5599, + "step": 37000 + }, + { + "epoch": 1.58, + "learning_rate": 2.1098760153911927e-05, + "loss": 0.4978, + "step": 37010 + }, + { + "epoch": 1.58, + "learning_rate": 2.1077383497221035e-05, + "loss": 0.572, + "step": 37020 + }, + { + "epoch": 1.58, + "learning_rate": 2.1056006840530143e-05, + "loss": 0.5189, + "step": 37030 + }, + { + "epoch": 1.58, + "learning_rate": 2.103463018383925e-05, + "loss": 0.5354, + "step": 37040 + }, + { + "epoch": 1.58, + "learning_rate": 2.1013253527148355e-05, + "loss": 0.5606, + "step": 37050 + }, + { + "epoch": 1.58, + "learning_rate": 2.099187687045746e-05, + "loss": 0.5094, + "step": 37060 + }, + { + "epoch": 1.58, + "learning_rate": 2.0970500213766567e-05, + "loss": 0.5725, + "step": 37070 + }, + { + "epoch": 1.58, + "learning_rate": 2.0949123557075675e-05, + "loss": 0.518, + "step": 37080 + }, + { + "epoch": 1.58, + "learning_rate": 2.092774690038478e-05, + "loss": 0.5284, + "step": 37090 + }, + { + "epoch": 1.58, + "learning_rate": 2.0906370243693888e-05, + "loss": 0.5517, + "step": 37100 + }, + { + "epoch": 1.58, + "learning_rate": 2.0884993587002992e-05, + "loss": 0.5121, + "step": 37110 + }, + { + "epoch": 1.58, + "learning_rate": 2.08636169303121e-05, + "loss": 0.5735, + "step": 37120 + }, + { + "epoch": 1.58, + "learning_rate": 2.0842240273621208e-05, + "loss": 0.5225, + "step": 37130 + }, + { + "epoch": 1.58, + "learning_rate": 2.0820863616930313e-05, + "loss": 0.5281, + "step": 37140 + }, + { + "epoch": 1.58, + "learning_rate": 2.079948696023942e-05, + "loss": 0.562, + "step": 37150 + }, + { + "epoch": 1.59, + "learning_rate": 2.0778110303548525e-05, + "loss": 0.5082, + "step": 37160 + }, + { + "epoch": 1.59, + "learning_rate": 2.0756733646857633e-05, + "loss": 0.564, + "step": 37170 + }, + { + "epoch": 1.59, + "learning_rate": 2.0735356990166737e-05, + "loss": 0.5248, + "step": 37180 + }, + { + "epoch": 1.59, + "learning_rate": 2.0713980333475845e-05, + "loss": 0.52, + "step": 37190 + }, + { + "epoch": 1.59, + "learning_rate": 2.0692603676784953e-05, + "loss": 0.5596, + "step": 37200 + }, + { + "epoch": 1.59, + "learning_rate": 2.0671227020094058e-05, + "loss": 0.5103, + "step": 37210 + }, + { + "epoch": 1.59, + "learning_rate": 2.0649850363403166e-05, + "loss": 0.5729, + "step": 37220 + }, + { + "epoch": 1.59, + "learning_rate": 2.062847370671227e-05, + "loss": 0.5222, + "step": 37230 + }, + { + "epoch": 1.59, + "learning_rate": 2.0607097050021378e-05, + "loss": 0.5309, + "step": 37240 + }, + { + "epoch": 1.59, + "learning_rate": 2.0585720393330486e-05, + "loss": 0.5519, + "step": 37250 + }, + { + "epoch": 1.59, + "learning_rate": 2.056434373663959e-05, + "loss": 0.5073, + "step": 37260 + }, + { + "epoch": 1.59, + "learning_rate": 2.0542967079948698e-05, + "loss": 0.5591, + "step": 37270 + }, + { + "epoch": 1.59, + "learning_rate": 2.0521590423257803e-05, + "loss": 0.5271, + "step": 37280 + }, + { + "epoch": 1.59, + "learning_rate": 2.0500213766566907e-05, + "loss": 0.5256, + "step": 37290 + }, + { + "epoch": 1.59, + "learning_rate": 2.047883710987602e-05, + "loss": 0.5645, + "step": 37300 + }, + { + "epoch": 1.59, + "learning_rate": 2.0457460453185123e-05, + "loss": 0.517, + "step": 37310 + }, + { + "epoch": 1.59, + "learning_rate": 2.0436083796494228e-05, + "loss": 0.5666, + "step": 37320 + }, + { + "epoch": 1.59, + "learning_rate": 2.0414707139803336e-05, + "loss": 0.5096, + "step": 37330 + }, + { + "epoch": 1.59, + "learning_rate": 2.039333048311244e-05, + "loss": 0.5355, + "step": 37340 + }, + { + "epoch": 1.59, + "learning_rate": 2.037195382642155e-05, + "loss": 0.5594, + "step": 37350 + }, + { + "epoch": 1.59, + "learning_rate": 2.0350577169730656e-05, + "loss": 0.503, + "step": 37360 + }, + { + "epoch": 1.59, + "learning_rate": 2.032920051303976e-05, + "loss": 0.5716, + "step": 37370 + }, + { + "epoch": 1.59, + "learning_rate": 2.0307823856348868e-05, + "loss": 0.5254, + "step": 37380 + }, + { + "epoch": 1.6, + "learning_rate": 2.0286447199657973e-05, + "loss": 0.5338, + "step": 37390 + }, + { + "epoch": 1.6, + "learning_rate": 2.026507054296708e-05, + "loss": 0.557, + "step": 37400 + }, + { + "epoch": 1.6, + "learning_rate": 2.024369388627619e-05, + "loss": 0.5114, + "step": 37410 + }, + { + "epoch": 1.6, + "learning_rate": 2.0222317229585293e-05, + "loss": 0.5682, + "step": 37420 + }, + { + "epoch": 1.6, + "learning_rate": 2.02009405728944e-05, + "loss": 0.5182, + "step": 37430 + }, + { + "epoch": 1.6, + "learning_rate": 2.0179563916203505e-05, + "loss": 0.5255, + "step": 37440 + }, + { + "epoch": 1.6, + "learning_rate": 2.0158187259512613e-05, + "loss": 0.5603, + "step": 37450 + }, + { + "epoch": 1.6, + "learning_rate": 2.013681060282172e-05, + "loss": 0.5068, + "step": 37460 + }, + { + "epoch": 1.6, + "learning_rate": 2.0115433946130826e-05, + "loss": 0.5704, + "step": 37470 + }, + { + "epoch": 1.6, + "learning_rate": 2.0094057289439934e-05, + "loss": 0.5153, + "step": 37480 + }, + { + "epoch": 1.6, + "learning_rate": 2.0072680632749038e-05, + "loss": 0.5272, + "step": 37490 + }, + { + "epoch": 1.6, + "learning_rate": 2.0051303976058146e-05, + "loss": 0.5513, + "step": 37500 + }, + { + "epoch": 1.6, + "learning_rate": 2.0029927319367254e-05, + "loss": 0.5076, + "step": 37510 + }, + { + "epoch": 1.6, + "learning_rate": 2.000855066267636e-05, + "loss": 0.5853, + "step": 37520 + }, + { + "epoch": 1.6, + "learning_rate": 1.9987174005985466e-05, + "loss": 0.5215, + "step": 37530 + }, + { + "epoch": 1.6, + "learning_rate": 1.996579734929457e-05, + "loss": 0.5277, + "step": 37540 + }, + { + "epoch": 1.6, + "learning_rate": 1.9944420692603675e-05, + "loss": 0.5589, + "step": 37550 + }, + { + "epoch": 1.6, + "learning_rate": 1.9923044035912783e-05, + "loss": 0.5087, + "step": 37560 + }, + { + "epoch": 1.6, + "learning_rate": 1.990166737922189e-05, + "loss": 0.5612, + "step": 37570 + }, + { + "epoch": 1.6, + "learning_rate": 1.9880290722531e-05, + "loss": 0.5153, + "step": 37580 + }, + { + "epoch": 1.6, + "learning_rate": 1.9858914065840104e-05, + "loss": 0.5333, + "step": 37590 + }, + { + "epoch": 1.6, + "learning_rate": 1.9837537409149208e-05, + "loss": 0.5608, + "step": 37600 + }, + { + "epoch": 1.6, + "learning_rate": 1.9816160752458316e-05, + "loss": 0.5017, + "step": 37610 + }, + { + "epoch": 1.6, + "learning_rate": 1.9794784095767424e-05, + "loss": 0.5673, + "step": 37620 + }, + { + "epoch": 1.61, + "learning_rate": 1.977340743907653e-05, + "loss": 0.5236, + "step": 37630 + }, + { + "epoch": 1.61, + "learning_rate": 1.9752030782385636e-05, + "loss": 0.5221, + "step": 37640 + }, + { + "epoch": 1.61, + "learning_rate": 1.973065412569474e-05, + "loss": 0.5548, + "step": 37650 + }, + { + "epoch": 1.61, + "learning_rate": 1.970927746900385e-05, + "loss": 0.5003, + "step": 37660 + }, + { + "epoch": 1.61, + "learning_rate": 1.9687900812312957e-05, + "loss": 0.5672, + "step": 37670 + }, + { + "epoch": 1.61, + "learning_rate": 1.966652415562206e-05, + "loss": 0.5219, + "step": 37680 + }, + { + "epoch": 1.61, + "learning_rate": 1.964514749893117e-05, + "loss": 0.5354, + "step": 37690 + }, + { + "epoch": 1.61, + "learning_rate": 1.9623770842240274e-05, + "loss": 0.5589, + "step": 37700 + }, + { + "epoch": 1.61, + "learning_rate": 1.960239418554938e-05, + "loss": 0.5161, + "step": 37710 + }, + { + "epoch": 1.61, + "learning_rate": 1.9581017528858486e-05, + "loss": 0.5671, + "step": 37720 + }, + { + "epoch": 1.61, + "learning_rate": 1.9559640872167594e-05, + "loss": 0.5205, + "step": 37730 + }, + { + "epoch": 1.61, + "learning_rate": 1.9538264215476702e-05, + "loss": 0.5299, + "step": 37740 + }, + { + "epoch": 1.61, + "learning_rate": 1.9516887558785806e-05, + "loss": 0.5636, + "step": 37750 + }, + { + "epoch": 1.61, + "learning_rate": 1.9495510902094914e-05, + "loss": 0.5004, + "step": 37760 + }, + { + "epoch": 1.61, + "learning_rate": 1.947413424540402e-05, + "loss": 0.5564, + "step": 37770 + }, + { + "epoch": 1.61, + "learning_rate": 1.9452757588713127e-05, + "loss": 0.5259, + "step": 37780 + }, + { + "epoch": 1.61, + "learning_rate": 1.9431380932022234e-05, + "loss": 0.5273, + "step": 37790 + }, + { + "epoch": 1.61, + "learning_rate": 1.941000427533134e-05, + "loss": 0.5627, + "step": 37800 + }, + { + "epoch": 1.61, + "learning_rate": 1.9388627618640447e-05, + "loss": 0.4996, + "step": 37810 + }, + { + "epoch": 1.61, + "learning_rate": 1.936725096194955e-05, + "loss": 0.5716, + "step": 37820 + }, + { + "epoch": 1.61, + "learning_rate": 1.9345874305258656e-05, + "loss": 0.5161, + "step": 37830 + }, + { + "epoch": 1.61, + "learning_rate": 1.9324497648567767e-05, + "loss": 0.5245, + "step": 37840 + }, + { + "epoch": 1.61, + "learning_rate": 1.9303120991876872e-05, + "loss": 0.5642, + "step": 37850 + }, + { + "epoch": 1.62, + "learning_rate": 1.9281744335185976e-05, + "loss": 0.5128, + "step": 37860 + }, + { + "epoch": 1.62, + "learning_rate": 1.9260367678495084e-05, + "loss": 0.5704, + "step": 37870 + }, + { + "epoch": 1.62, + "learning_rate": 1.923899102180419e-05, + "loss": 0.526, + "step": 37880 + }, + { + "epoch": 1.62, + "learning_rate": 1.92176143651133e-05, + "loss": 0.5337, + "step": 37890 + }, + { + "epoch": 1.62, + "learning_rate": 1.9196237708422404e-05, + "loss": 0.5522, + "step": 37900 + }, + { + "epoch": 1.62, + "learning_rate": 1.917486105173151e-05, + "loss": 0.508, + "step": 37910 + }, + { + "epoch": 1.62, + "learning_rate": 1.9153484395040617e-05, + "loss": 0.5649, + "step": 37920 + }, + { + "epoch": 1.62, + "learning_rate": 1.913210773834972e-05, + "loss": 0.5195, + "step": 37930 + }, + { + "epoch": 1.62, + "learning_rate": 1.911073108165883e-05, + "loss": 0.5259, + "step": 37940 + }, + { + "epoch": 1.62, + "learning_rate": 1.9089354424967937e-05, + "loss": 0.5615, + "step": 37950 + }, + { + "epoch": 1.62, + "learning_rate": 1.906797776827704e-05, + "loss": 0.5151, + "step": 37960 + }, + { + "epoch": 1.62, + "learning_rate": 1.904660111158615e-05, + "loss": 0.5667, + "step": 37970 + }, + { + "epoch": 1.62, + "learning_rate": 1.9025224454895254e-05, + "loss": 0.5242, + "step": 37980 + }, + { + "epoch": 1.62, + "learning_rate": 1.9003847798204362e-05, + "loss": 0.5269, + "step": 37990 + }, + { + "epoch": 1.62, + "learning_rate": 1.898247114151347e-05, + "loss": 0.549, + "step": 38000 + }, + { + "epoch": 1.62, + "learning_rate": 1.8961094484822574e-05, + "loss": 0.5042, + "step": 38010 + }, + { + "epoch": 1.62, + "learning_rate": 1.8939717828131682e-05, + "loss": 0.5665, + "step": 38020 + }, + { + "epoch": 1.62, + "learning_rate": 1.8918341171440787e-05, + "loss": 0.5068, + "step": 38030 + }, + { + "epoch": 1.62, + "learning_rate": 1.8896964514749895e-05, + "loss": 0.5193, + "step": 38040 + }, + { + "epoch": 1.62, + "learning_rate": 1.8875587858059003e-05, + "loss": 0.5512, + "step": 38050 + }, + { + "epoch": 1.62, + "learning_rate": 1.8854211201368107e-05, + "loss": 0.5185, + "step": 38060 + }, + { + "epoch": 1.62, + "learning_rate": 1.8832834544677215e-05, + "loss": 0.5581, + "step": 38070 + }, + { + "epoch": 1.62, + "learning_rate": 1.881145788798632e-05, + "loss": 0.5225, + "step": 38080 + }, + { + "epoch": 1.62, + "learning_rate": 1.8790081231295424e-05, + "loss": 0.5277, + "step": 38090 + }, + { + "epoch": 1.63, + "learning_rate": 1.8768704574604532e-05, + "loss": 0.5649, + "step": 38100 + }, + { + "epoch": 1.63, + "learning_rate": 1.874732791791364e-05, + "loss": 0.4986, + "step": 38110 + }, + { + "epoch": 1.63, + "learning_rate": 1.8725951261222748e-05, + "loss": 0.5674, + "step": 38120 + }, + { + "epoch": 1.63, + "learning_rate": 1.8704574604531852e-05, + "loss": 0.5197, + "step": 38130 + }, + { + "epoch": 1.63, + "learning_rate": 1.8683197947840957e-05, + "loss": 0.5308, + "step": 38140 + }, + { + "epoch": 1.63, + "learning_rate": 1.8661821291150065e-05, + "loss": 0.5578, + "step": 38150 + }, + { + "epoch": 1.63, + "learning_rate": 1.8640444634459173e-05, + "loss": 0.5063, + "step": 38160 + }, + { + "epoch": 1.63, + "learning_rate": 1.8619067977768277e-05, + "loss": 0.5711, + "step": 38170 + }, + { + "epoch": 1.63, + "learning_rate": 1.8597691321077385e-05, + "loss": 0.5165, + "step": 38180 + }, + { + "epoch": 1.63, + "learning_rate": 1.857631466438649e-05, + "loss": 0.5249, + "step": 38190 + }, + { + "epoch": 1.63, + "learning_rate": 1.8554938007695597e-05, + "loss": 0.5477, + "step": 38200 + }, + { + "epoch": 1.63, + "learning_rate": 1.8533561351004702e-05, + "loss": 0.51, + "step": 38210 + }, + { + "epoch": 1.63, + "learning_rate": 1.851218469431381e-05, + "loss": 0.5727, + "step": 38220 + }, + { + "epoch": 1.63, + "learning_rate": 1.8490808037622918e-05, + "loss": 0.513, + "step": 38230 + }, + { + "epoch": 1.63, + "learning_rate": 1.8469431380932022e-05, + "loss": 0.5305, + "step": 38240 + }, + { + "epoch": 1.63, + "learning_rate": 1.844805472424113e-05, + "loss": 0.5605, + "step": 38250 + }, + { + "epoch": 1.63, + "learning_rate": 1.8426678067550235e-05, + "loss": 0.5104, + "step": 38260 + }, + { + "epoch": 1.63, + "learning_rate": 1.8405301410859342e-05, + "loss": 0.5694, + "step": 38270 + }, + { + "epoch": 1.63, + "learning_rate": 1.838392475416845e-05, + "loss": 0.5148, + "step": 38280 + }, + { + "epoch": 1.63, + "learning_rate": 1.8362548097477555e-05, + "loss": 0.5296, + "step": 38290 + }, + { + "epoch": 1.63, + "learning_rate": 1.8341171440786663e-05, + "loss": 0.5514, + "step": 38300 + }, + { + "epoch": 1.63, + "learning_rate": 1.8319794784095767e-05, + "loss": 0.508, + "step": 38310 + }, + { + "epoch": 1.63, + "learning_rate": 1.8298418127404875e-05, + "loss": 0.5728, + "step": 38320 + }, + { + "epoch": 1.64, + "learning_rate": 1.8277041470713983e-05, + "loss": 0.5156, + "step": 38330 + }, + { + "epoch": 1.64, + "learning_rate": 1.8255664814023088e-05, + "loss": 0.5198, + "step": 38340 + }, + { + "epoch": 1.64, + "learning_rate": 1.8234288157332196e-05, + "loss": 0.5434, + "step": 38350 + }, + { + "epoch": 1.64, + "learning_rate": 1.82129115006413e-05, + "loss": 0.5131, + "step": 38360 + }, + { + "epoch": 1.64, + "learning_rate": 1.8191534843950405e-05, + "loss": 0.568, + "step": 38370 + }, + { + "epoch": 1.64, + "learning_rate": 1.8170158187259516e-05, + "loss": 0.5121, + "step": 38380 + }, + { + "epoch": 1.64, + "learning_rate": 1.814878153056862e-05, + "loss": 0.5255, + "step": 38390 + }, + { + "epoch": 1.64, + "learning_rate": 1.8127404873877725e-05, + "loss": 0.5673, + "step": 38400 + }, + { + "epoch": 1.64, + "learning_rate": 1.8106028217186833e-05, + "loss": 0.5118, + "step": 38410 + }, + { + "epoch": 1.64, + "learning_rate": 1.8084651560495937e-05, + "loss": 0.5715, + "step": 38420 + }, + { + "epoch": 1.64, + "learning_rate": 1.806327490380505e-05, + "loss": 0.5241, + "step": 38430 + }, + { + "epoch": 1.64, + "learning_rate": 1.8041898247114153e-05, + "loss": 0.5356, + "step": 38440 + }, + { + "epoch": 1.64, + "learning_rate": 1.8020521590423258e-05, + "loss": 0.5496, + "step": 38450 + }, + { + "epoch": 1.64, + "learning_rate": 1.7999144933732365e-05, + "loss": 0.5135, + "step": 38460 + }, + { + "epoch": 1.64, + "learning_rate": 1.797776827704147e-05, + "loss": 0.5605, + "step": 38470 + }, + { + "epoch": 1.64, + "learning_rate": 1.7956391620350578e-05, + "loss": 0.5171, + "step": 38480 + }, + { + "epoch": 1.64, + "learning_rate": 1.7935014963659686e-05, + "loss": 0.5216, + "step": 38490 + }, + { + "epoch": 1.64, + "learning_rate": 1.791363830696879e-05, + "loss": 0.5654, + "step": 38500 + }, + { + "epoch": 1.64, + "learning_rate": 1.7892261650277898e-05, + "loss": 0.5102, + "step": 38510 + }, + { + "epoch": 1.64, + "learning_rate": 1.7870884993587003e-05, + "loss": 0.5682, + "step": 38520 + }, + { + "epoch": 1.64, + "learning_rate": 1.784950833689611e-05, + "loss": 0.5161, + "step": 38530 + }, + { + "epoch": 1.64, + "learning_rate": 1.782813168020522e-05, + "loss": 0.5299, + "step": 38540 + }, + { + "epoch": 1.64, + "learning_rate": 1.7806755023514323e-05, + "loss": 0.5659, + "step": 38550 + }, + { + "epoch": 1.65, + "learning_rate": 1.778537836682343e-05, + "loss": 0.5038, + "step": 38560 + }, + { + "epoch": 1.65, + "learning_rate": 1.7764001710132535e-05, + "loss": 0.5591, + "step": 38570 + }, + { + "epoch": 1.65, + "learning_rate": 1.7742625053441643e-05, + "loss": 0.5197, + "step": 38580 + }, + { + "epoch": 1.65, + "learning_rate": 1.772124839675075e-05, + "loss": 0.5212, + "step": 38590 + }, + { + "epoch": 1.65, + "learning_rate": 1.7699871740059856e-05, + "loss": 0.5545, + "step": 38600 + }, + { + "epoch": 1.65, + "learning_rate": 1.7678495083368964e-05, + "loss": 0.5122, + "step": 38610 + }, + { + "epoch": 1.65, + "learning_rate": 1.7657118426678068e-05, + "loss": 0.5693, + "step": 38620 + }, + { + "epoch": 1.65, + "learning_rate": 1.7635741769987173e-05, + "loss": 0.5204, + "step": 38630 + }, + { + "epoch": 1.65, + "learning_rate": 1.761436511329628e-05, + "loss": 0.5229, + "step": 38640 + }, + { + "epoch": 1.65, + "learning_rate": 1.759298845660539e-05, + "loss": 0.5528, + "step": 38650 + }, + { + "epoch": 1.65, + "learning_rate": 1.7571611799914496e-05, + "loss": 0.5099, + "step": 38660 + }, + { + "epoch": 1.65, + "learning_rate": 1.75502351432236e-05, + "loss": 0.5809, + "step": 38670 + }, + { + "epoch": 1.65, + "learning_rate": 1.7528858486532705e-05, + "loss": 0.5174, + "step": 38680 + }, + { + "epoch": 1.65, + "learning_rate": 1.7507481829841813e-05, + "loss": 0.5295, + "step": 38690 + }, + { + "epoch": 1.65, + "learning_rate": 1.748610517315092e-05, + "loss": 0.5602, + "step": 38700 + }, + { + "epoch": 1.65, + "learning_rate": 1.7464728516460026e-05, + "loss": 0.5008, + "step": 38710 + }, + { + "epoch": 1.65, + "learning_rate": 1.7443351859769134e-05, + "loss": 0.5734, + "step": 38720 + }, + { + "epoch": 1.65, + "learning_rate": 1.7421975203078238e-05, + "loss": 0.5235, + "step": 38730 + }, + { + "epoch": 1.65, + "learning_rate": 1.7400598546387346e-05, + "loss": 0.5313, + "step": 38740 + }, + { + "epoch": 1.65, + "learning_rate": 1.737922188969645e-05, + "loss": 0.56, + "step": 38750 + }, + { + "epoch": 1.65, + "learning_rate": 1.735784523300556e-05, + "loss": 0.5049, + "step": 38760 + }, + { + "epoch": 1.65, + "learning_rate": 1.7336468576314666e-05, + "loss": 0.5697, + "step": 38770 + }, + { + "epoch": 1.65, + "learning_rate": 1.731509191962377e-05, + "loss": 0.52, + "step": 38780 + }, + { + "epoch": 1.65, + "learning_rate": 1.729371526293288e-05, + "loss": 0.5214, + "step": 38790 + }, + { + "epoch": 1.66, + "learning_rate": 1.7272338606241983e-05, + "loss": 0.5637, + "step": 38800 + }, + { + "epoch": 1.66, + "learning_rate": 1.725096194955109e-05, + "loss": 0.5022, + "step": 38810 + }, + { + "epoch": 1.66, + "learning_rate": 1.72295852928602e-05, + "loss": 0.5754, + "step": 38820 + }, + { + "epoch": 1.66, + "learning_rate": 1.7208208636169303e-05, + "loss": 0.518, + "step": 38830 + }, + { + "epoch": 1.66, + "learning_rate": 1.718683197947841e-05, + "loss": 0.5312, + "step": 38840 + }, + { + "epoch": 1.66, + "learning_rate": 1.7165455322787516e-05, + "loss": 0.5594, + "step": 38850 + }, + { + "epoch": 1.66, + "learning_rate": 1.7144078666096624e-05, + "loss": 0.5084, + "step": 38860 + }, + { + "epoch": 1.66, + "learning_rate": 1.7122702009405732e-05, + "loss": 0.5726, + "step": 38870 + }, + { + "epoch": 1.66, + "learning_rate": 1.7101325352714836e-05, + "loss": 0.5136, + "step": 38880 + }, + { + "epoch": 1.66, + "learning_rate": 1.7079948696023944e-05, + "loss": 0.5292, + "step": 38890 + }, + { + "epoch": 1.66, + "learning_rate": 1.705857203933305e-05, + "loss": 0.5546, + "step": 38900 + }, + { + "epoch": 1.66, + "learning_rate": 1.7037195382642153e-05, + "loss": 0.5019, + "step": 38910 + }, + { + "epoch": 1.66, + "learning_rate": 1.7015818725951264e-05, + "loss": 0.566, + "step": 38920 + }, + { + "epoch": 1.66, + "learning_rate": 1.699444206926037e-05, + "loss": 0.5118, + "step": 38930 + }, + { + "epoch": 1.66, + "learning_rate": 1.6973065412569473e-05, + "loss": 0.5285, + "step": 38940 + }, + { + "epoch": 1.66, + "learning_rate": 1.695168875587858e-05, + "loss": 0.558, + "step": 38950 + }, + { + "epoch": 1.66, + "learning_rate": 1.6930312099187686e-05, + "loss": 0.4989, + "step": 38960 + }, + { + "epoch": 1.66, + "learning_rate": 1.6908935442496797e-05, + "loss": 0.5581, + "step": 38970 + }, + { + "epoch": 1.66, + "learning_rate": 1.68875587858059e-05, + "loss": 0.5153, + "step": 38980 + }, + { + "epoch": 1.66, + "learning_rate": 1.6866182129115006e-05, + "loss": 0.53, + "step": 38990 + }, + { + "epoch": 1.66, + "learning_rate": 1.6844805472424114e-05, + "loss": 0.5501, + "step": 39000 + }, + { + "epoch": 1.66, + "learning_rate": 1.682342881573322e-05, + "loss": 0.5112, + "step": 39010 + }, + { + "epoch": 1.66, + "learning_rate": 1.6802052159042326e-05, + "loss": 0.5584, + "step": 39020 + }, + { + "epoch": 1.67, + "learning_rate": 1.6780675502351434e-05, + "loss": 0.5208, + "step": 39030 + }, + { + "epoch": 1.67, + "learning_rate": 1.675929884566054e-05, + "loss": 0.5123, + "step": 39040 + }, + { + "epoch": 1.67, + "learning_rate": 1.6737922188969647e-05, + "loss": 0.5547, + "step": 39050 + }, + { + "epoch": 1.67, + "learning_rate": 1.671654553227875e-05, + "loss": 0.5048, + "step": 39060 + }, + { + "epoch": 1.67, + "learning_rate": 1.669516887558786e-05, + "loss": 0.5687, + "step": 39070 + }, + { + "epoch": 1.67, + "learning_rate": 1.6673792218896967e-05, + "loss": 0.5187, + "step": 39080 + }, + { + "epoch": 1.67, + "learning_rate": 1.665241556220607e-05, + "loss": 0.5316, + "step": 39090 + }, + { + "epoch": 1.67, + "learning_rate": 1.663103890551518e-05, + "loss": 0.5485, + "step": 39100 + }, + { + "epoch": 1.67, + "learning_rate": 1.6609662248824284e-05, + "loss": 0.5114, + "step": 39110 + }, + { + "epoch": 1.67, + "learning_rate": 1.658828559213339e-05, + "loss": 0.5725, + "step": 39120 + }, + { + "epoch": 1.67, + "learning_rate": 1.65669089354425e-05, + "loss": 0.5265, + "step": 39130 + }, + { + "epoch": 1.67, + "learning_rate": 1.6545532278751604e-05, + "loss": 0.5188, + "step": 39140 + }, + { + "epoch": 1.67, + "learning_rate": 1.6524155622060712e-05, + "loss": 0.5499, + "step": 39150 + }, + { + "epoch": 1.67, + "learning_rate": 1.6502778965369817e-05, + "loss": 0.5027, + "step": 39160 + }, + { + "epoch": 1.67, + "learning_rate": 1.648140230867892e-05, + "loss": 0.5696, + "step": 39170 + }, + { + "epoch": 1.67, + "learning_rate": 1.646002565198803e-05, + "loss": 0.5243, + "step": 39180 + }, + { + "epoch": 1.67, + "learning_rate": 1.6438648995297137e-05, + "loss": 0.5253, + "step": 39190 + }, + { + "epoch": 1.67, + "learning_rate": 1.6417272338606245e-05, + "loss": 0.5628, + "step": 39200 + }, + { + "epoch": 1.67, + "learning_rate": 1.639589568191535e-05, + "loss": 0.5031, + "step": 39210 + }, + { + "epoch": 1.67, + "learning_rate": 1.6374519025224454e-05, + "loss": 0.5765, + "step": 39220 + }, + { + "epoch": 1.67, + "learning_rate": 1.6353142368533562e-05, + "loss": 0.5262, + "step": 39230 + }, + { + "epoch": 1.67, + "learning_rate": 1.633176571184267e-05, + "loss": 0.5338, + "step": 39240 + }, + { + "epoch": 1.67, + "learning_rate": 1.6310389055151774e-05, + "loss": 0.5531, + "step": 39250 + }, + { + "epoch": 1.67, + "learning_rate": 1.6289012398460882e-05, + "loss": 0.5043, + "step": 39260 + }, + { + "epoch": 1.68, + "learning_rate": 1.6267635741769987e-05, + "loss": 0.57, + "step": 39270 + }, + { + "epoch": 1.68, + "learning_rate": 1.6246259085079095e-05, + "loss": 0.5234, + "step": 39280 + }, + { + "epoch": 1.68, + "learning_rate": 1.62248824283882e-05, + "loss": 0.5198, + "step": 39290 + }, + { + "epoch": 1.68, + "learning_rate": 1.6203505771697307e-05, + "loss": 0.5573, + "step": 39300 + }, + { + "epoch": 1.68, + "learning_rate": 1.6182129115006415e-05, + "loss": 0.5148, + "step": 39310 + }, + { + "epoch": 1.68, + "learning_rate": 1.616075245831552e-05, + "loss": 0.5646, + "step": 39320 + }, + { + "epoch": 1.68, + "learning_rate": 1.6139375801624627e-05, + "loss": 0.5127, + "step": 39330 + }, + { + "epoch": 1.68, + "learning_rate": 1.6117999144933732e-05, + "loss": 0.5251, + "step": 39340 + }, + { + "epoch": 1.68, + "learning_rate": 1.609662248824284e-05, + "loss": 0.5552, + "step": 39350 + }, + { + "epoch": 1.68, + "learning_rate": 1.6075245831551948e-05, + "loss": 0.5125, + "step": 39360 + }, + { + "epoch": 1.68, + "learning_rate": 1.6053869174861052e-05, + "loss": 0.5655, + "step": 39370 + }, + { + "epoch": 1.68, + "learning_rate": 1.603249251817016e-05, + "loss": 0.5103, + "step": 39380 + }, + { + "epoch": 1.68, + "learning_rate": 1.6011115861479265e-05, + "loss": 0.5161, + "step": 39390 + }, + { + "epoch": 1.68, + "learning_rate": 1.5989739204788372e-05, + "loss": 0.5525, + "step": 39400 + }, + { + "epoch": 1.68, + "learning_rate": 1.596836254809748e-05, + "loss": 0.502, + "step": 39410 + }, + { + "epoch": 1.68, + "learning_rate": 1.5946985891406585e-05, + "loss": 0.5731, + "step": 39420 + }, + { + "epoch": 1.68, + "learning_rate": 1.592560923471569e-05, + "loss": 0.5189, + "step": 39430 + }, + { + "epoch": 1.68, + "learning_rate": 1.5904232578024797e-05, + "loss": 0.5223, + "step": 39440 + }, + { + "epoch": 1.68, + "learning_rate": 1.5882855921333902e-05, + "loss": 0.5464, + "step": 39450 + }, + { + "epoch": 1.68, + "learning_rate": 1.5861479264643013e-05, + "loss": 0.5001, + "step": 39460 + }, + { + "epoch": 1.68, + "learning_rate": 1.5840102607952118e-05, + "loss": 0.5768, + "step": 39470 + }, + { + "epoch": 1.68, + "learning_rate": 1.5818725951261222e-05, + "loss": 0.525, + "step": 39480 + }, + { + "epoch": 1.68, + "learning_rate": 1.579734929457033e-05, + "loss": 0.5232, + "step": 39490 + }, + { + "epoch": 1.69, + "learning_rate": 1.5775972637879434e-05, + "loss": 0.5565, + "step": 39500 + }, + { + "epoch": 1.69, + "learning_rate": 1.5754595981188546e-05, + "loss": 0.5087, + "step": 39510 + }, + { + "epoch": 1.69, + "learning_rate": 1.573321932449765e-05, + "loss": 0.5669, + "step": 39520 + }, + { + "epoch": 1.69, + "learning_rate": 1.5711842667806755e-05, + "loss": 0.5226, + "step": 39530 + }, + { + "epoch": 1.69, + "learning_rate": 1.5690466011115863e-05, + "loss": 0.5239, + "step": 39540 + }, + { + "epoch": 1.69, + "learning_rate": 1.5669089354424967e-05, + "loss": 0.5559, + "step": 39550 + }, + { + "epoch": 1.69, + "learning_rate": 1.5647712697734075e-05, + "loss": 0.5064, + "step": 39560 + }, + { + "epoch": 1.69, + "learning_rate": 1.5626336041043183e-05, + "loss": 0.5658, + "step": 39570 + }, + { + "epoch": 1.69, + "learning_rate": 1.5604959384352287e-05, + "loss": 0.5179, + "step": 39580 + }, + { + "epoch": 1.69, + "learning_rate": 1.5583582727661395e-05, + "loss": 0.5338, + "step": 39590 + }, + { + "epoch": 1.69, + "learning_rate": 1.55622060709705e-05, + "loss": 0.5614, + "step": 39600 + }, + { + "epoch": 1.69, + "learning_rate": 1.5540829414279608e-05, + "loss": 0.514, + "step": 39610 + }, + { + "epoch": 1.69, + "learning_rate": 1.5519452757588716e-05, + "loss": 0.5565, + "step": 39620 + }, + { + "epoch": 1.69, + "learning_rate": 1.549807610089782e-05, + "loss": 0.5097, + "step": 39630 + }, + { + "epoch": 1.69, + "learning_rate": 1.5476699444206928e-05, + "loss": 0.521, + "step": 39640 + }, + { + "epoch": 1.69, + "learning_rate": 1.5455322787516033e-05, + "loss": 0.5607, + "step": 39650 + }, + { + "epoch": 1.69, + "learning_rate": 1.5433946130825137e-05, + "loss": 0.5021, + "step": 39660 + }, + { + "epoch": 1.69, + "learning_rate": 1.541256947413425e-05, + "loss": 0.5605, + "step": 39670 + }, + { + "epoch": 1.69, + "learning_rate": 1.5391192817443353e-05, + "loss": 0.515, + "step": 39680 + }, + { + "epoch": 1.69, + "learning_rate": 1.536981616075246e-05, + "loss": 0.5225, + "step": 39690 + }, + { + "epoch": 1.69, + "learning_rate": 1.5348439504061565e-05, + "loss": 0.5504, + "step": 39700 + }, + { + "epoch": 1.69, + "learning_rate": 1.532706284737067e-05, + "loss": 0.5155, + "step": 39710 + }, + { + "epoch": 1.69, + "learning_rate": 1.5305686190679778e-05, + "loss": 0.5586, + "step": 39720 + }, + { + "epoch": 1.69, + "learning_rate": 1.5284309533988886e-05, + "loss": 0.5223, + "step": 39730 + }, + { + "epoch": 1.7, + "learning_rate": 1.526293287729799e-05, + "loss": 0.5211, + "step": 39740 + }, + { + "epoch": 1.7, + "learning_rate": 1.5241556220607098e-05, + "loss": 0.5536, + "step": 39750 + }, + { + "epoch": 1.7, + "learning_rate": 1.5220179563916204e-05, + "loss": 0.5054, + "step": 39760 + }, + { + "epoch": 1.7, + "learning_rate": 1.519880290722531e-05, + "loss": 0.5726, + "step": 39770 + }, + { + "epoch": 1.7, + "learning_rate": 1.5177426250534418e-05, + "loss": 0.5204, + "step": 39780 + }, + { + "epoch": 1.7, + "learning_rate": 1.5156049593843525e-05, + "loss": 0.5305, + "step": 39790 + }, + { + "epoch": 1.7, + "learning_rate": 1.513467293715263e-05, + "loss": 0.5532, + "step": 39800 + }, + { + "epoch": 1.7, + "learning_rate": 1.5113296280461737e-05, + "loss": 0.5084, + "step": 39810 + }, + { + "epoch": 1.7, + "learning_rate": 1.5091919623770841e-05, + "loss": 0.5689, + "step": 39820 + }, + { + "epoch": 1.7, + "learning_rate": 1.5070542967079948e-05, + "loss": 0.5179, + "step": 39830 + }, + { + "epoch": 1.7, + "learning_rate": 1.5049166310389057e-05, + "loss": 0.5303, + "step": 39840 + }, + { + "epoch": 1.7, + "learning_rate": 1.5027789653698163e-05, + "loss": 0.5508, + "step": 39850 + }, + { + "epoch": 1.7, + "learning_rate": 1.5006412997007268e-05, + "loss": 0.4999, + "step": 39860 + }, + { + "epoch": 1.7, + "learning_rate": 1.4985036340316374e-05, + "loss": 0.5794, + "step": 39870 + }, + { + "epoch": 1.7, + "learning_rate": 1.496365968362548e-05, + "loss": 0.5159, + "step": 39880 + }, + { + "epoch": 1.7, + "learning_rate": 1.494228302693459e-05, + "loss": 0.5263, + "step": 39890 + }, + { + "epoch": 1.7, + "learning_rate": 1.4920906370243695e-05, + "loss": 0.5502, + "step": 39900 + }, + { + "epoch": 1.7, + "learning_rate": 1.48995297135528e-05, + "loss": 0.5047, + "step": 39910 + }, + { + "epoch": 1.7, + "learning_rate": 1.4878153056861907e-05, + "loss": 0.5581, + "step": 39920 + }, + { + "epoch": 1.7, + "learning_rate": 1.4856776400171013e-05, + "loss": 0.5193, + "step": 39930 + }, + { + "epoch": 1.7, + "learning_rate": 1.4835399743480121e-05, + "loss": 0.5291, + "step": 39940 + }, + { + "epoch": 1.7, + "learning_rate": 1.4814023086789227e-05, + "loss": 0.5464, + "step": 39950 + }, + { + "epoch": 1.7, + "learning_rate": 1.4792646430098333e-05, + "loss": 0.4991, + "step": 39960 + }, + { + "epoch": 1.71, + "learning_rate": 1.477126977340744e-05, + "loss": 0.5594, + "step": 39970 + }, + { + "epoch": 1.71, + "learning_rate": 1.4749893116716546e-05, + "loss": 0.5084, + "step": 39980 + }, + { + "epoch": 1.71, + "learning_rate": 1.4728516460025652e-05, + "loss": 0.5331, + "step": 39990 + }, + { + "epoch": 1.71, + "learning_rate": 1.470713980333476e-05, + "loss": 0.5491, + "step": 40000 + }, + { + "epoch": 1.71, + "learning_rate": 1.4685763146643866e-05, + "loss": 0.5045, + "step": 40010 + }, + { + "epoch": 1.71, + "learning_rate": 1.4664386489952972e-05, + "loss": 0.5638, + "step": 40020 + }, + { + "epoch": 1.71, + "learning_rate": 1.4643009833262079e-05, + "loss": 0.5124, + "step": 40030 + }, + { + "epoch": 1.71, + "learning_rate": 1.4621633176571183e-05, + "loss": 0.525, + "step": 40040 + }, + { + "epoch": 1.71, + "learning_rate": 1.4600256519880293e-05, + "loss": 0.555, + "step": 40050 + }, + { + "epoch": 1.71, + "learning_rate": 1.4578879863189399e-05, + "loss": 0.5053, + "step": 40060 + }, + { + "epoch": 1.71, + "learning_rate": 1.4557503206498505e-05, + "loss": 0.5617, + "step": 40070 + }, + { + "epoch": 1.71, + "learning_rate": 1.4536126549807611e-05, + "loss": 0.53, + "step": 40080 + }, + { + "epoch": 1.71, + "learning_rate": 1.4514749893116716e-05, + "loss": 0.5287, + "step": 40090 + }, + { + "epoch": 1.71, + "learning_rate": 1.4493373236425822e-05, + "loss": 0.5579, + "step": 40100 + }, + { + "epoch": 1.71, + "learning_rate": 1.4471996579734932e-05, + "loss": 0.4981, + "step": 40110 + }, + { + "epoch": 1.71, + "learning_rate": 1.4450619923044038e-05, + "loss": 0.5615, + "step": 40120 + }, + { + "epoch": 1.71, + "learning_rate": 1.4429243266353142e-05, + "loss": 0.5165, + "step": 40130 + }, + { + "epoch": 1.71, + "learning_rate": 1.4407866609662249e-05, + "loss": 0.5177, + "step": 40140 + }, + { + "epoch": 1.71, + "learning_rate": 1.4386489952971355e-05, + "loss": 0.5567, + "step": 40150 + }, + { + "epoch": 1.71, + "learning_rate": 1.4365113296280464e-05, + "loss": 0.4999, + "step": 40160 + }, + { + "epoch": 1.71, + "learning_rate": 1.4343736639589569e-05, + "loss": 0.5627, + "step": 40170 + }, + { + "epoch": 1.71, + "learning_rate": 1.4322359982898675e-05, + "loss": 0.5102, + "step": 40180 + }, + { + "epoch": 1.71, + "learning_rate": 1.4300983326207781e-05, + "loss": 0.5206, + "step": 40190 + }, + { + "epoch": 1.71, + "learning_rate": 1.4279606669516887e-05, + "loss": 0.5538, + "step": 40200 + }, + { + "epoch": 1.72, + "learning_rate": 1.4258230012825995e-05, + "loss": 0.509, + "step": 40210 + }, + { + "epoch": 1.72, + "learning_rate": 1.4236853356135102e-05, + "loss": 0.5642, + "step": 40220 + }, + { + "epoch": 1.72, + "learning_rate": 1.4215476699444208e-05, + "loss": 0.5163, + "step": 40230 + }, + { + "epoch": 1.72, + "learning_rate": 1.4194100042753314e-05, + "loss": 0.5198, + "step": 40240 + }, + { + "epoch": 1.72, + "learning_rate": 1.417272338606242e-05, + "loss": 0.5468, + "step": 40250 + }, + { + "epoch": 1.72, + "learning_rate": 1.4151346729371526e-05, + "loss": 0.502, + "step": 40260 + }, + { + "epoch": 1.72, + "learning_rate": 1.4129970072680634e-05, + "loss": 0.5645, + "step": 40270 + }, + { + "epoch": 1.72, + "learning_rate": 1.410859341598974e-05, + "loss": 0.5204, + "step": 40280 + }, + { + "epoch": 1.72, + "learning_rate": 1.4087216759298847e-05, + "loss": 0.5252, + "step": 40290 + }, + { + "epoch": 1.72, + "learning_rate": 1.4065840102607953e-05, + "loss": 0.5529, + "step": 40300 + }, + { + "epoch": 1.72, + "learning_rate": 1.4044463445917057e-05, + "loss": 0.5043, + "step": 40310 + }, + { + "epoch": 1.72, + "learning_rate": 1.4023086789226167e-05, + "loss": 0.5661, + "step": 40320 + }, + { + "epoch": 1.72, + "learning_rate": 1.4001710132535273e-05, + "loss": 0.5141, + "step": 40330 + }, + { + "epoch": 1.72, + "learning_rate": 1.398033347584438e-05, + "loss": 0.5242, + "step": 40340 + }, + { + "epoch": 1.72, + "learning_rate": 1.3958956819153484e-05, + "loss": 0.5575, + "step": 40350 + }, + { + "epoch": 1.72, + "learning_rate": 1.393758016246259e-05, + "loss": 0.4986, + "step": 40360 + }, + { + "epoch": 1.72, + "learning_rate": 1.3916203505771696e-05, + "loss": 0.5641, + "step": 40370 + }, + { + "epoch": 1.72, + "learning_rate": 1.3894826849080806e-05, + "loss": 0.5157, + "step": 40380 + }, + { + "epoch": 1.72, + "learning_rate": 1.3873450192389912e-05, + "loss": 0.5217, + "step": 40390 + }, + { + "epoch": 1.72, + "learning_rate": 1.3852073535699017e-05, + "loss": 0.5534, + "step": 40400 + }, + { + "epoch": 1.72, + "learning_rate": 1.3830696879008123e-05, + "loss": 0.5028, + "step": 40410 + }, + { + "epoch": 1.72, + "learning_rate": 1.3809320222317229e-05, + "loss": 0.5713, + "step": 40420 + }, + { + "epoch": 1.72, + "learning_rate": 1.3787943565626339e-05, + "loss": 0.5282, + "step": 40430 + }, + { + "epoch": 1.73, + "learning_rate": 1.3766566908935443e-05, + "loss": 0.5118, + "step": 40440 + }, + { + "epoch": 1.73, + "learning_rate": 1.374519025224455e-05, + "loss": 0.5586, + "step": 40450 + }, + { + "epoch": 1.73, + "learning_rate": 1.3723813595553656e-05, + "loss": 0.5132, + "step": 40460 + }, + { + "epoch": 1.73, + "learning_rate": 1.3702436938862762e-05, + "loss": 0.5677, + "step": 40470 + }, + { + "epoch": 1.73, + "learning_rate": 1.368106028217187e-05, + "loss": 0.5162, + "step": 40480 + }, + { + "epoch": 1.73, + "learning_rate": 1.3659683625480976e-05, + "loss": 0.5248, + "step": 40490 + }, + { + "epoch": 1.73, + "learning_rate": 1.3638306968790082e-05, + "loss": 0.5554, + "step": 40500 + }, + { + "epoch": 1.73, + "learning_rate": 1.3616930312099188e-05, + "loss": 0.5069, + "step": 40510 + }, + { + "epoch": 1.73, + "learning_rate": 1.3595553655408294e-05, + "loss": 0.5715, + "step": 40520 + }, + { + "epoch": 1.73, + "learning_rate": 1.35741769987174e-05, + "loss": 0.5124, + "step": 40530 + }, + { + "epoch": 1.73, + "learning_rate": 1.3552800342026509e-05, + "loss": 0.5219, + "step": 40540 + }, + { + "epoch": 1.73, + "learning_rate": 1.3531423685335615e-05, + "loss": 0.5411, + "step": 40550 + }, + { + "epoch": 1.73, + "learning_rate": 1.3510047028644721e-05, + "loss": 0.5052, + "step": 40560 + }, + { + "epoch": 1.73, + "learning_rate": 1.3488670371953827e-05, + "loss": 0.5621, + "step": 40570 + }, + { + "epoch": 1.73, + "learning_rate": 1.3467293715262932e-05, + "loss": 0.5124, + "step": 40580 + }, + { + "epoch": 1.73, + "learning_rate": 1.3445917058572041e-05, + "loss": 0.5134, + "step": 40590 + }, + { + "epoch": 1.73, + "learning_rate": 1.3424540401881147e-05, + "loss": 0.5494, + "step": 40600 + }, + { + "epoch": 1.73, + "learning_rate": 1.3403163745190254e-05, + "loss": 0.5032, + "step": 40610 + }, + { + "epoch": 1.73, + "learning_rate": 1.3381787088499358e-05, + "loss": 0.566, + "step": 40620 + }, + { + "epoch": 1.73, + "learning_rate": 1.3360410431808464e-05, + "loss": 0.5154, + "step": 40630 + }, + { + "epoch": 1.73, + "learning_rate": 1.333903377511757e-05, + "loss": 0.5252, + "step": 40640 + }, + { + "epoch": 1.73, + "learning_rate": 1.331765711842668e-05, + "loss": 0.556, + "step": 40650 + }, + { + "epoch": 1.73, + "learning_rate": 1.3296280461735785e-05, + "loss": 0.5067, + "step": 40660 + }, + { + "epoch": 1.74, + "learning_rate": 1.3274903805044891e-05, + "loss": 0.5683, + "step": 40670 + }, + { + "epoch": 1.74, + "learning_rate": 1.3253527148353997e-05, + "loss": 0.5116, + "step": 40680 + }, + { + "epoch": 1.74, + "learning_rate": 1.3232150491663103e-05, + "loss": 0.5171, + "step": 40690 + }, + { + "epoch": 1.74, + "learning_rate": 1.3210773834972213e-05, + "loss": 0.5522, + "step": 40700 + }, + { + "epoch": 1.74, + "learning_rate": 1.3189397178281317e-05, + "loss": 0.5101, + "step": 40710 + }, + { + "epoch": 1.74, + "learning_rate": 1.3168020521590424e-05, + "loss": 0.5683, + "step": 40720 + }, + { + "epoch": 1.74, + "learning_rate": 1.314664386489953e-05, + "loss": 0.5176, + "step": 40730 + }, + { + "epoch": 1.74, + "learning_rate": 1.3125267208208636e-05, + "loss": 0.5228, + "step": 40740 + }, + { + "epoch": 1.74, + "learning_rate": 1.3103890551517744e-05, + "loss": 0.5439, + "step": 40750 + }, + { + "epoch": 1.74, + "learning_rate": 1.308251389482685e-05, + "loss": 0.507, + "step": 40760 + }, + { + "epoch": 1.74, + "learning_rate": 1.3061137238135956e-05, + "loss": 0.5665, + "step": 40770 + }, + { + "epoch": 1.74, + "learning_rate": 1.3039760581445063e-05, + "loss": 0.5112, + "step": 40780 + }, + { + "epoch": 1.74, + "learning_rate": 1.3018383924754169e-05, + "loss": 0.5214, + "step": 40790 + }, + { + "epoch": 1.74, + "learning_rate": 1.2997007268063275e-05, + "loss": 0.5534, + "step": 40800 + }, + { + "epoch": 1.74, + "learning_rate": 1.2975630611372383e-05, + "loss": 0.5018, + "step": 40810 + }, + { + "epoch": 1.74, + "learning_rate": 1.2954253954681489e-05, + "loss": 0.5679, + "step": 40820 + }, + { + "epoch": 1.74, + "learning_rate": 1.2932877297990595e-05, + "loss": 0.5183, + "step": 40830 + }, + { + "epoch": 1.74, + "learning_rate": 1.2911500641299701e-05, + "loss": 0.5148, + "step": 40840 + }, + { + "epoch": 1.74, + "learning_rate": 1.2890123984608806e-05, + "loss": 0.5451, + "step": 40850 + }, + { + "epoch": 1.74, + "learning_rate": 1.2868747327917916e-05, + "loss": 0.5003, + "step": 40860 + }, + { + "epoch": 1.74, + "learning_rate": 1.2847370671227022e-05, + "loss": 0.5627, + "step": 40870 + }, + { + "epoch": 1.74, + "learning_rate": 1.2825994014536128e-05, + "loss": 0.5117, + "step": 40880 + }, + { + "epoch": 1.74, + "learning_rate": 1.2804617357845233e-05, + "loss": 0.5263, + "step": 40890 + }, + { + "epoch": 1.74, + "learning_rate": 1.2783240701154339e-05, + "loss": 0.5484, + "step": 40900 + }, + { + "epoch": 1.75, + "learning_rate": 1.2761864044463445e-05, + "loss": 0.5056, + "step": 40910 + }, + { + "epoch": 1.75, + "learning_rate": 1.2740487387772555e-05, + "loss": 0.558, + "step": 40920 + }, + { + "epoch": 1.75, + "learning_rate": 1.2719110731081659e-05, + "loss": 0.5204, + "step": 40930 + }, + { + "epoch": 1.75, + "learning_rate": 1.2697734074390765e-05, + "loss": 0.5221, + "step": 40940 + }, + { + "epoch": 1.75, + "learning_rate": 1.2676357417699871e-05, + "loss": 0.5537, + "step": 40950 + }, + { + "epoch": 1.75, + "learning_rate": 1.2654980761008978e-05, + "loss": 0.5114, + "step": 40960 + }, + { + "epoch": 1.75, + "learning_rate": 1.2633604104318086e-05, + "loss": 0.5598, + "step": 40970 + }, + { + "epoch": 1.75, + "learning_rate": 1.2612227447627192e-05, + "loss": 0.5174, + "step": 40980 + }, + { + "epoch": 1.75, + "learning_rate": 1.2590850790936298e-05, + "loss": 0.5196, + "step": 40990 + }, + { + "epoch": 1.75, + "learning_rate": 1.2569474134245404e-05, + "loss": 0.5483, + "step": 41000 + }, + { + "epoch": 1.75, + "learning_rate": 1.254809747755451e-05, + "loss": 0.5085, + "step": 41010 + }, + { + "epoch": 1.75, + "learning_rate": 1.2526720820863618e-05, + "loss": 0.5646, + "step": 41020 + }, + { + "epoch": 1.75, + "learning_rate": 1.2505344164172724e-05, + "loss": 0.5127, + "step": 41030 + }, + { + "epoch": 1.75, + "learning_rate": 1.248396750748183e-05, + "loss": 0.5171, + "step": 41040 + }, + { + "epoch": 1.75, + "learning_rate": 1.2462590850790937e-05, + "loss": 0.5522, + "step": 41050 + }, + { + "epoch": 1.75, + "learning_rate": 1.2441214194100045e-05, + "loss": 0.5076, + "step": 41060 + }, + { + "epoch": 1.75, + "learning_rate": 1.241983753740915e-05, + "loss": 0.5602, + "step": 41070 + }, + { + "epoch": 1.75, + "learning_rate": 1.2398460880718255e-05, + "loss": 0.513, + "step": 41080 + }, + { + "epoch": 1.75, + "learning_rate": 1.2377084224027363e-05, + "loss": 0.5224, + "step": 41090 + }, + { + "epoch": 1.75, + "learning_rate": 1.235570756733647e-05, + "loss": 0.5429, + "step": 41100 + }, + { + "epoch": 1.75, + "learning_rate": 1.2334330910645576e-05, + "loss": 0.5042, + "step": 41110 + }, + { + "epoch": 1.75, + "learning_rate": 1.2312954253954682e-05, + "loss": 0.5733, + "step": 41120 + }, + { + "epoch": 1.75, + "learning_rate": 1.2291577597263788e-05, + "loss": 0.5147, + "step": 41130 + }, + { + "epoch": 1.76, + "learning_rate": 1.2270200940572894e-05, + "loss": 0.5268, + "step": 41140 + }, + { + "epoch": 1.76, + "learning_rate": 1.2248824283882002e-05, + "loss": 0.5561, + "step": 41150 + }, + { + "epoch": 1.76, + "learning_rate": 1.2227447627191107e-05, + "loss": 0.5125, + "step": 41160 + }, + { + "epoch": 1.76, + "learning_rate": 1.2206070970500215e-05, + "loss": 0.5639, + "step": 41170 + }, + { + "epoch": 1.76, + "learning_rate": 1.2184694313809321e-05, + "loss": 0.5106, + "step": 41180 + }, + { + "epoch": 1.76, + "learning_rate": 1.2163317657118427e-05, + "loss": 0.5236, + "step": 41190 + }, + { + "epoch": 1.76, + "learning_rate": 1.2141941000427533e-05, + "loss": 0.5443, + "step": 41200 + }, + { + "epoch": 1.76, + "learning_rate": 1.212056434373664e-05, + "loss": 0.5085, + "step": 41210 + }, + { + "epoch": 1.76, + "learning_rate": 1.2099187687045746e-05, + "loss": 0.5681, + "step": 41220 + }, + { + "epoch": 1.76, + "learning_rate": 1.2077811030354854e-05, + "loss": 0.5077, + "step": 41230 + }, + { + "epoch": 1.76, + "learning_rate": 1.205643437366396e-05, + "loss": 0.5215, + "step": 41240 + }, + { + "epoch": 1.76, + "learning_rate": 1.2035057716973066e-05, + "loss": 0.5513, + "step": 41250 + }, + { + "epoch": 1.76, + "learning_rate": 1.2013681060282172e-05, + "loss": 0.5034, + "step": 41260 + }, + { + "epoch": 1.76, + "learning_rate": 1.1992304403591278e-05, + "loss": 0.5688, + "step": 41270 + }, + { + "epoch": 1.76, + "learning_rate": 1.1970927746900386e-05, + "loss": 0.5171, + "step": 41280 + }, + { + "epoch": 1.76, + "learning_rate": 1.1949551090209493e-05, + "loss": 0.525, + "step": 41290 + }, + { + "epoch": 1.76, + "learning_rate": 1.1928174433518597e-05, + "loss": 0.5471, + "step": 41300 + }, + { + "epoch": 1.76, + "learning_rate": 1.1906797776827705e-05, + "loss": 0.5052, + "step": 41310 + }, + { + "epoch": 1.76, + "learning_rate": 1.1885421120136811e-05, + "loss": 0.5683, + "step": 41320 + }, + { + "epoch": 1.76, + "learning_rate": 1.1864044463445919e-05, + "loss": 0.5166, + "step": 41330 + }, + { + "epoch": 1.76, + "learning_rate": 1.1842667806755024e-05, + "loss": 0.5127, + "step": 41340 + }, + { + "epoch": 1.76, + "learning_rate": 1.182129115006413e-05, + "loss": 0.5636, + "step": 41350 + }, + { + "epoch": 1.76, + "learning_rate": 1.1799914493373238e-05, + "loss": 0.5107, + "step": 41360 + }, + { + "epoch": 1.76, + "learning_rate": 1.1778537836682344e-05, + "loss": 0.5588, + "step": 41370 + }, + { + "epoch": 1.77, + "learning_rate": 1.175716117999145e-05, + "loss": 0.5127, + "step": 41380 + }, + { + "epoch": 1.77, + "learning_rate": 1.1735784523300556e-05, + "loss": 0.5225, + "step": 41390 + }, + { + "epoch": 1.77, + "learning_rate": 1.1714407866609663e-05, + "loss": 0.5463, + "step": 41400 + }, + { + "epoch": 1.77, + "learning_rate": 1.1693031209918769e-05, + "loss": 0.5081, + "step": 41410 + }, + { + "epoch": 1.77, + "learning_rate": 1.1671654553227877e-05, + "loss": 0.5612, + "step": 41420 + }, + { + "epoch": 1.77, + "learning_rate": 1.1650277896536981e-05, + "loss": 0.5154, + "step": 41430 + }, + { + "epoch": 1.77, + "learning_rate": 1.1628901239846089e-05, + "loss": 0.5222, + "step": 41440 + }, + { + "epoch": 1.77, + "learning_rate": 1.1607524583155195e-05, + "loss": 0.5473, + "step": 41450 + }, + { + "epoch": 1.77, + "learning_rate": 1.1586147926464301e-05, + "loss": 0.4975, + "step": 41460 + }, + { + "epoch": 1.77, + "learning_rate": 1.1564771269773408e-05, + "loss": 0.5628, + "step": 41470 + }, + { + "epoch": 1.77, + "learning_rate": 1.1543394613082514e-05, + "loss": 0.5163, + "step": 41480 + }, + { + "epoch": 1.77, + "learning_rate": 1.152201795639162e-05, + "loss": 0.5241, + "step": 41490 + }, + { + "epoch": 1.77, + "learning_rate": 1.1500641299700728e-05, + "loss": 0.5412, + "step": 41500 + }, + { + "epoch": 1.77, + "learning_rate": 1.1479264643009834e-05, + "loss": 0.5064, + "step": 41510 + }, + { + "epoch": 1.77, + "learning_rate": 1.145788798631894e-05, + "loss": 0.5605, + "step": 41520 + }, + { + "epoch": 1.77, + "learning_rate": 1.1436511329628047e-05, + "loss": 0.5161, + "step": 41530 + }, + { + "epoch": 1.77, + "learning_rate": 1.1415134672937153e-05, + "loss": 0.5293, + "step": 41540 + }, + { + "epoch": 1.77, + "learning_rate": 1.139375801624626e-05, + "loss": 0.548, + "step": 41550 + }, + { + "epoch": 1.77, + "learning_rate": 1.1372381359555367e-05, + "loss": 0.505, + "step": 41560 + }, + { + "epoch": 1.77, + "learning_rate": 1.1351004702864471e-05, + "loss": 0.565, + "step": 41570 + }, + { + "epoch": 1.77, + "learning_rate": 1.132962804617358e-05, + "loss": 0.5183, + "step": 41580 + }, + { + "epoch": 1.77, + "learning_rate": 1.1308251389482685e-05, + "loss": 0.5249, + "step": 41590 + }, + { + "epoch": 1.77, + "learning_rate": 1.1286874732791793e-05, + "loss": 0.5464, + "step": 41600 + }, + { + "epoch": 1.78, + "learning_rate": 1.1265498076100898e-05, + "loss": 0.5033, + "step": 41610 + }, + { + "epoch": 1.78, + "learning_rate": 1.1244121419410004e-05, + "loss": 0.5585, + "step": 41620 + }, + { + "epoch": 1.78, + "learning_rate": 1.1222744762719112e-05, + "loss": 0.52, + "step": 41630 + }, + { + "epoch": 1.78, + "learning_rate": 1.1201368106028218e-05, + "loss": 0.5215, + "step": 41640 + }, + { + "epoch": 1.78, + "learning_rate": 1.1179991449337324e-05, + "loss": 0.546, + "step": 41650 + }, + { + "epoch": 1.78, + "learning_rate": 1.115861479264643e-05, + "loss": 0.5047, + "step": 41660 + }, + { + "epoch": 1.78, + "learning_rate": 1.1137238135955537e-05, + "loss": 0.5565, + "step": 41670 + }, + { + "epoch": 1.78, + "learning_rate": 1.1115861479264643e-05, + "loss": 0.5122, + "step": 41680 + }, + { + "epoch": 1.78, + "learning_rate": 1.1094484822573751e-05, + "loss": 0.5235, + "step": 41690 + }, + { + "epoch": 1.78, + "learning_rate": 1.1073108165882855e-05, + "loss": 0.5528, + "step": 41700 + }, + { + "epoch": 1.78, + "learning_rate": 1.1051731509191963e-05, + "loss": 0.5108, + "step": 41710 + }, + { + "epoch": 1.78, + "learning_rate": 1.103035485250107e-05, + "loss": 0.5608, + "step": 41720 + }, + { + "epoch": 1.78, + "learning_rate": 1.1008978195810176e-05, + "loss": 0.5076, + "step": 41730 + }, + { + "epoch": 1.78, + "learning_rate": 1.0987601539119282e-05, + "loss": 0.5264, + "step": 41740 + }, + { + "epoch": 1.78, + "learning_rate": 1.0966224882428388e-05, + "loss": 0.5519, + "step": 41750 + }, + { + "epoch": 1.78, + "learning_rate": 1.0944848225737494e-05, + "loss": 0.5026, + "step": 41760 + }, + { + "epoch": 1.78, + "learning_rate": 1.0923471569046602e-05, + "loss": 0.5612, + "step": 41770 + }, + { + "epoch": 1.78, + "learning_rate": 1.0902094912355708e-05, + "loss": 0.5182, + "step": 41780 + }, + { + "epoch": 1.78, + "learning_rate": 1.0880718255664815e-05, + "loss": 0.5251, + "step": 41790 + }, + { + "epoch": 1.78, + "learning_rate": 1.0859341598973921e-05, + "loss": 0.5528, + "step": 41800 + }, + { + "epoch": 1.78, + "learning_rate": 1.0837964942283027e-05, + "loss": 0.5013, + "step": 41810 + }, + { + "epoch": 1.78, + "learning_rate": 1.0816588285592135e-05, + "loss": 0.5561, + "step": 41820 + }, + { + "epoch": 1.78, + "learning_rate": 1.079521162890124e-05, + "loss": 0.5142, + "step": 41830 + }, + { + "epoch": 1.78, + "learning_rate": 1.0773834972210346e-05, + "loss": 0.5273, + "step": 41840 + }, + { + "epoch": 1.79, + "learning_rate": 1.0752458315519454e-05, + "loss": 0.5565, + "step": 41850 + }, + { + "epoch": 1.79, + "learning_rate": 1.073108165882856e-05, + "loss": 0.5017, + "step": 41860 + }, + { + "epoch": 1.79, + "learning_rate": 1.0709705002137668e-05, + "loss": 0.5726, + "step": 41870 + }, + { + "epoch": 1.79, + "learning_rate": 1.0688328345446772e-05, + "loss": 0.5096, + "step": 41880 + }, + { + "epoch": 1.79, + "learning_rate": 1.0666951688755878e-05, + "loss": 0.5205, + "step": 41890 + }, + { + "epoch": 1.79, + "learning_rate": 1.0645575032064986e-05, + "loss": 0.5508, + "step": 41900 + }, + { + "epoch": 1.79, + "learning_rate": 1.0624198375374093e-05, + "loss": 0.5005, + "step": 41910 + }, + { + "epoch": 1.79, + "learning_rate": 1.0602821718683199e-05, + "loss": 0.5657, + "step": 41920 + }, + { + "epoch": 1.79, + "learning_rate": 1.0581445061992305e-05, + "loss": 0.5118, + "step": 41930 + }, + { + "epoch": 1.79, + "learning_rate": 1.0560068405301411e-05, + "loss": 0.5195, + "step": 41940 + }, + { + "epoch": 1.79, + "learning_rate": 1.0538691748610517e-05, + "loss": 0.5482, + "step": 41950 + }, + { + "epoch": 1.79, + "learning_rate": 1.0517315091919625e-05, + "loss": 0.4987, + "step": 41960 + }, + { + "epoch": 1.79, + "learning_rate": 1.049593843522873e-05, + "loss": 0.5671, + "step": 41970 + }, + { + "epoch": 1.79, + "learning_rate": 1.0474561778537838e-05, + "loss": 0.5015, + "step": 41980 + }, + { + "epoch": 1.79, + "learning_rate": 1.0453185121846944e-05, + "loss": 0.5188, + "step": 41990 + }, + { + "epoch": 1.79, + "learning_rate": 1.043180846515605e-05, + "loss": 0.5486, + "step": 42000 + }, + { + "epoch": 1.79, + "learning_rate": 1.0410431808465156e-05, + "loss": 0.5005, + "step": 42010 + }, + { + "epoch": 1.79, + "learning_rate": 1.0389055151774262e-05, + "loss": 0.5609, + "step": 42020 + }, + { + "epoch": 1.79, + "learning_rate": 1.0367678495083369e-05, + "loss": 0.516, + "step": 42030 + }, + { + "epoch": 1.79, + "learning_rate": 1.0346301838392477e-05, + "loss": 0.5234, + "step": 42040 + }, + { + "epoch": 1.79, + "learning_rate": 1.0324925181701583e-05, + "loss": 0.5424, + "step": 42050 + }, + { + "epoch": 1.79, + "learning_rate": 1.0303548525010689e-05, + "loss": 0.4915, + "step": 42060 + }, + { + "epoch": 1.79, + "learning_rate": 1.0282171868319795e-05, + "loss": 0.5652, + "step": 42070 + }, + { + "epoch": 1.8, + "learning_rate": 1.0260795211628901e-05, + "loss": 0.5156, + "step": 42080 + }, + { + "epoch": 1.8, + "learning_rate": 1.023941855493801e-05, + "loss": 0.5266, + "step": 42090 + }, + { + "epoch": 1.8, + "learning_rate": 1.0218041898247114e-05, + "loss": 0.5479, + "step": 42100 + }, + { + "epoch": 1.8, + "learning_rate": 1.019666524155622e-05, + "loss": 0.5043, + "step": 42110 + }, + { + "epoch": 1.8, + "learning_rate": 1.0175288584865328e-05, + "loss": 0.5692, + "step": 42120 + }, + { + "epoch": 1.8, + "learning_rate": 1.0153911928174434e-05, + "loss": 0.5165, + "step": 42130 + }, + { + "epoch": 1.8, + "learning_rate": 1.013253527148354e-05, + "loss": 0.5144, + "step": 42140 + }, + { + "epoch": 1.8, + "learning_rate": 1.0111158614792647e-05, + "loss": 0.5476, + "step": 42150 + }, + { + "epoch": 1.8, + "learning_rate": 1.0089781958101753e-05, + "loss": 0.5122, + "step": 42160 + }, + { + "epoch": 1.8, + "learning_rate": 1.006840530141086e-05, + "loss": 0.5704, + "step": 42170 + }, + { + "epoch": 1.8, + "learning_rate": 1.0047028644719967e-05, + "loss": 0.5222, + "step": 42180 + }, + { + "epoch": 1.8, + "learning_rate": 1.0025651988029073e-05, + "loss": 0.5131, + "step": 42190 + }, + { + "epoch": 1.8, + "learning_rate": 1.000427533133818e-05, + "loss": 0.5537, + "step": 42200 + }, + { + "epoch": 1.8, + "learning_rate": 9.982898674647285e-06, + "loss": 0.5093, + "step": 42210 + }, + { + "epoch": 1.8, + "learning_rate": 9.961522017956392e-06, + "loss": 0.5591, + "step": 42220 + }, + { + "epoch": 1.8, + "learning_rate": 9.9401453612655e-06, + "loss": 0.5137, + "step": 42230 + }, + { + "epoch": 1.8, + "learning_rate": 9.918768704574604e-06, + "loss": 0.5159, + "step": 42240 + }, + { + "epoch": 1.8, + "learning_rate": 9.897392047883712e-06, + "loss": 0.5404, + "step": 42250 + }, + { + "epoch": 1.8, + "learning_rate": 9.876015391192818e-06, + "loss": 0.5019, + "step": 42260 + }, + { + "epoch": 1.8, + "learning_rate": 9.854638734501924e-06, + "loss": 0.5679, + "step": 42270 + }, + { + "epoch": 1.8, + "learning_rate": 9.83326207781103e-06, + "loss": 0.5113, + "step": 42280 + }, + { + "epoch": 1.8, + "learning_rate": 9.811885421120137e-06, + "loss": 0.53, + "step": 42290 + }, + { + "epoch": 1.8, + "learning_rate": 9.790508764429243e-06, + "loss": 0.5465, + "step": 42300 + }, + { + "epoch": 1.81, + "learning_rate": 9.769132107738351e-06, + "loss": 0.493, + "step": 42310 + }, + { + "epoch": 1.81, + "learning_rate": 9.747755451047457e-06, + "loss": 0.5612, + "step": 42320 + }, + { + "epoch": 1.81, + "learning_rate": 9.726378794356563e-06, + "loss": 0.5223, + "step": 42330 + }, + { + "epoch": 1.81, + "learning_rate": 9.70500213766567e-06, + "loss": 0.5253, + "step": 42340 + }, + { + "epoch": 1.81, + "learning_rate": 9.683625480974776e-06, + "loss": 0.5391, + "step": 42350 + }, + { + "epoch": 1.81, + "learning_rate": 9.662248824283884e-06, + "loss": 0.5063, + "step": 42360 + }, + { + "epoch": 1.81, + "learning_rate": 9.640872167592988e-06, + "loss": 0.5604, + "step": 42370 + }, + { + "epoch": 1.81, + "learning_rate": 9.619495510902094e-06, + "loss": 0.5145, + "step": 42380 + }, + { + "epoch": 1.81, + "learning_rate": 9.598118854211202e-06, + "loss": 0.5214, + "step": 42390 + }, + { + "epoch": 1.81, + "learning_rate": 9.576742197520308e-06, + "loss": 0.5511, + "step": 42400 + }, + { + "epoch": 1.81, + "learning_rate": 9.555365540829415e-06, + "loss": 0.4929, + "step": 42410 + }, + { + "epoch": 1.81, + "learning_rate": 9.53398888413852e-06, + "loss": 0.5647, + "step": 42420 + }, + { + "epoch": 1.81, + "learning_rate": 9.512612227447627e-06, + "loss": 0.521, + "step": 42430 + }, + { + "epoch": 1.81, + "learning_rate": 9.491235570756735e-06, + "loss": 0.5345, + "step": 42440 + }, + { + "epoch": 1.81, + "learning_rate": 9.469858914065841e-06, + "loss": 0.5393, + "step": 42450 + }, + { + "epoch": 1.81, + "learning_rate": 9.448482257374947e-06, + "loss": 0.5024, + "step": 42460 + }, + { + "epoch": 1.81, + "learning_rate": 9.427105600684054e-06, + "loss": 0.5668, + "step": 42470 + }, + { + "epoch": 1.81, + "learning_rate": 9.40572894399316e-06, + "loss": 0.516, + "step": 42480 + }, + { + "epoch": 1.81, + "learning_rate": 9.384352287302266e-06, + "loss": 0.5233, + "step": 42490 + }, + { + "epoch": 1.81, + "learning_rate": 9.362975630611374e-06, + "loss": 0.5451, + "step": 42500 + }, + { + "epoch": 1.81, + "learning_rate": 9.341598973920478e-06, + "loss": 0.5121, + "step": 42510 + }, + { + "epoch": 1.81, + "learning_rate": 9.320222317229586e-06, + "loss": 0.5625, + "step": 42520 + }, + { + "epoch": 1.81, + "learning_rate": 9.298845660538692e-06, + "loss": 0.5146, + "step": 42530 + }, + { + "epoch": 1.81, + "learning_rate": 9.277469003847799e-06, + "loss": 0.5248, + "step": 42540 + }, + { + "epoch": 1.82, + "learning_rate": 9.256092347156905e-06, + "loss": 0.5458, + "step": 42550 + }, + { + "epoch": 1.82, + "learning_rate": 9.234715690466011e-06, + "loss": 0.5039, + "step": 42560 + }, + { + "epoch": 1.82, + "learning_rate": 9.213339033775117e-06, + "loss": 0.5661, + "step": 42570 + }, + { + "epoch": 1.82, + "learning_rate": 9.191962377084225e-06, + "loss": 0.5119, + "step": 42580 + }, + { + "epoch": 1.82, + "learning_rate": 9.170585720393331e-06, + "loss": 0.5182, + "step": 42590 + }, + { + "epoch": 1.82, + "learning_rate": 9.149209063702438e-06, + "loss": 0.559, + "step": 42600 + }, + { + "epoch": 1.82, + "learning_rate": 9.127832407011544e-06, + "loss": 0.5081, + "step": 42610 + }, + { + "epoch": 1.82, + "learning_rate": 9.10645575032065e-06, + "loss": 0.5762, + "step": 42620 + }, + { + "epoch": 1.82, + "learning_rate": 9.085079093629758e-06, + "loss": 0.5034, + "step": 42630 + }, + { + "epoch": 1.82, + "learning_rate": 9.063702436938862e-06, + "loss": 0.5192, + "step": 42640 + }, + { + "epoch": 1.82, + "learning_rate": 9.042325780247969e-06, + "loss": 0.5505, + "step": 42650 + }, + { + "epoch": 1.82, + "learning_rate": 9.020949123557077e-06, + "loss": 0.5026, + "step": 42660 + }, + { + "epoch": 1.82, + "learning_rate": 8.999572466866183e-06, + "loss": 0.5607, + "step": 42670 + }, + { + "epoch": 1.82, + "learning_rate": 8.978195810175289e-06, + "loss": 0.5174, + "step": 42680 + }, + { + "epoch": 1.82, + "learning_rate": 8.956819153484395e-06, + "loss": 0.5171, + "step": 42690 + }, + { + "epoch": 1.82, + "learning_rate": 8.935442496793501e-06, + "loss": 0.5517, + "step": 42700 + }, + { + "epoch": 1.82, + "learning_rate": 8.91406584010261e-06, + "loss": 0.5065, + "step": 42710 + }, + { + "epoch": 1.82, + "learning_rate": 8.892689183411715e-06, + "loss": 0.5643, + "step": 42720 + }, + { + "epoch": 1.82, + "learning_rate": 8.871312526720822e-06, + "loss": 0.5147, + "step": 42730 + }, + { + "epoch": 1.82, + "learning_rate": 8.849935870029928e-06, + "loss": 0.5222, + "step": 42740 + }, + { + "epoch": 1.82, + "learning_rate": 8.828559213339034e-06, + "loss": 0.5503, + "step": 42750 + }, + { + "epoch": 1.82, + "learning_rate": 8.80718255664814e-06, + "loss": 0.4987, + "step": 42760 + }, + { + "epoch": 1.82, + "learning_rate": 8.785805899957248e-06, + "loss": 0.5692, + "step": 42770 + }, + { + "epoch": 1.83, + "learning_rate": 8.764429243266353e-06, + "loss": 0.52, + "step": 42780 + }, + { + "epoch": 1.83, + "learning_rate": 8.74305258657546e-06, + "loss": 0.524, + "step": 42790 + }, + { + "epoch": 1.83, + "learning_rate": 8.721675929884567e-06, + "loss": 0.5374, + "step": 42800 + }, + { + "epoch": 1.83, + "learning_rate": 8.700299273193673e-06, + "loss": 0.4956, + "step": 42810 + }, + { + "epoch": 1.83, + "learning_rate": 8.67892261650278e-06, + "loss": 0.559, + "step": 42820 + }, + { + "epoch": 1.83, + "learning_rate": 8.657545959811885e-06, + "loss": 0.5066, + "step": 42830 + }, + { + "epoch": 1.83, + "learning_rate": 8.636169303120992e-06, + "loss": 0.515, + "step": 42840 + }, + { + "epoch": 1.83, + "learning_rate": 8.6147926464301e-06, + "loss": 0.5417, + "step": 42850 + }, + { + "epoch": 1.83, + "learning_rate": 8.593415989739206e-06, + "loss": 0.4989, + "step": 42860 + }, + { + "epoch": 1.83, + "learning_rate": 8.572039333048312e-06, + "loss": 0.557, + "step": 42870 + }, + { + "epoch": 1.83, + "learning_rate": 8.550662676357418e-06, + "loss": 0.5143, + "step": 42880 + }, + { + "epoch": 1.83, + "learning_rate": 8.529286019666524e-06, + "loss": 0.5236, + "step": 42890 + }, + { + "epoch": 1.83, + "learning_rate": 8.507909362975632e-06, + "loss": 0.5459, + "step": 42900 + }, + { + "epoch": 1.83, + "learning_rate": 8.486532706284737e-06, + "loss": 0.5086, + "step": 42910 + }, + { + "epoch": 1.83, + "learning_rate": 8.465156049593843e-06, + "loss": 0.5672, + "step": 42920 + }, + { + "epoch": 1.83, + "learning_rate": 8.44377939290295e-06, + "loss": 0.5162, + "step": 42930 + }, + { + "epoch": 1.83, + "learning_rate": 8.422402736212057e-06, + "loss": 0.5181, + "step": 42940 + }, + { + "epoch": 1.83, + "learning_rate": 8.401026079521163e-06, + "loss": 0.5462, + "step": 42950 + }, + { + "epoch": 1.83, + "learning_rate": 8.37964942283027e-06, + "loss": 0.5016, + "step": 42960 + }, + { + "epoch": 1.83, + "learning_rate": 8.358272766139376e-06, + "loss": 0.5667, + "step": 42970 + }, + { + "epoch": 1.83, + "learning_rate": 8.336896109448484e-06, + "loss": 0.5093, + "step": 42980 + }, + { + "epoch": 1.83, + "learning_rate": 8.31551945275759e-06, + "loss": 0.523, + "step": 42990 + }, + { + "epoch": 1.83, + "learning_rate": 8.294142796066694e-06, + "loss": 0.5536, + "step": 43000 + }, + { + "epoch": 1.83, + "learning_rate": 8.272766139375802e-06, + "loss": 0.5025, + "step": 43010 + }, + { + "epoch": 1.84, + "learning_rate": 8.251389482684908e-06, + "loss": 0.5581, + "step": 43020 + }, + { + "epoch": 1.84, + "learning_rate": 8.230012825994015e-06, + "loss": 0.5187, + "step": 43030 + }, + { + "epoch": 1.84, + "learning_rate": 8.208636169303122e-06, + "loss": 0.5189, + "step": 43040 + }, + { + "epoch": 1.84, + "learning_rate": 8.187259512612227e-06, + "loss": 0.5461, + "step": 43050 + }, + { + "epoch": 1.84, + "learning_rate": 8.165882855921335e-06, + "loss": 0.5043, + "step": 43060 + }, + { + "epoch": 1.84, + "learning_rate": 8.144506199230441e-06, + "loss": 0.5663, + "step": 43070 + }, + { + "epoch": 1.84, + "learning_rate": 8.123129542539547e-06, + "loss": 0.5221, + "step": 43080 + }, + { + "epoch": 1.84, + "learning_rate": 8.101752885848653e-06, + "loss": 0.5205, + "step": 43090 + }, + { + "epoch": 1.84, + "learning_rate": 8.08037622915776e-06, + "loss": 0.5404, + "step": 43100 + }, + { + "epoch": 1.84, + "learning_rate": 8.058999572466866e-06, + "loss": 0.502, + "step": 43110 + }, + { + "epoch": 1.84, + "learning_rate": 8.037622915775974e-06, + "loss": 0.5652, + "step": 43120 + }, + { + "epoch": 1.84, + "learning_rate": 8.01624625908508e-06, + "loss": 0.5164, + "step": 43130 + }, + { + "epoch": 1.84, + "learning_rate": 7.994869602394186e-06, + "loss": 0.5173, + "step": 43140 + }, + { + "epoch": 1.84, + "learning_rate": 7.973492945703292e-06, + "loss": 0.5519, + "step": 43150 + }, + { + "epoch": 1.84, + "learning_rate": 7.952116289012399e-06, + "loss": 0.5036, + "step": 43160 + }, + { + "epoch": 1.84, + "learning_rate": 7.930739632321507e-06, + "loss": 0.563, + "step": 43170 + }, + { + "epoch": 1.84, + "learning_rate": 7.909362975630611e-06, + "loss": 0.5159, + "step": 43180 + }, + { + "epoch": 1.84, + "learning_rate": 7.887986318939717e-06, + "loss": 0.5354, + "step": 43190 + }, + { + "epoch": 1.84, + "learning_rate": 7.866609662248825e-06, + "loss": 0.5548, + "step": 43200 + }, + { + "epoch": 1.84, + "learning_rate": 7.845233005557931e-06, + "loss": 0.5043, + "step": 43210 + }, + { + "epoch": 1.84, + "learning_rate": 7.823856348867038e-06, + "loss": 0.5604, + "step": 43220 + }, + { + "epoch": 1.84, + "learning_rate": 7.802479692176144e-06, + "loss": 0.5163, + "step": 43230 + }, + { + "epoch": 1.84, + "learning_rate": 7.78110303548525e-06, + "loss": 0.5172, + "step": 43240 + }, + { + "epoch": 1.85, + "learning_rate": 7.759726378794358e-06, + "loss": 0.5485, + "step": 43250 + }, + { + "epoch": 1.85, + "learning_rate": 7.738349722103464e-06, + "loss": 0.5004, + "step": 43260 + }, + { + "epoch": 1.85, + "learning_rate": 7.716973065412569e-06, + "loss": 0.5551, + "step": 43270 + }, + { + "epoch": 1.85, + "learning_rate": 7.695596408721676e-06, + "loss": 0.5127, + "step": 43280 + }, + { + "epoch": 1.85, + "learning_rate": 7.674219752030783e-06, + "loss": 0.5253, + "step": 43290 + }, + { + "epoch": 1.85, + "learning_rate": 7.652843095339889e-06, + "loss": 0.5409, + "step": 43300 + }, + { + "epoch": 1.85, + "learning_rate": 7.631466438648995e-06, + "loss": 0.5021, + "step": 43310 + }, + { + "epoch": 1.85, + "learning_rate": 7.610089781958102e-06, + "loss": 0.5556, + "step": 43320 + }, + { + "epoch": 1.85, + "learning_rate": 7.588713125267209e-06, + "loss": 0.5181, + "step": 43330 + }, + { + "epoch": 1.85, + "learning_rate": 7.567336468576315e-06, + "loss": 0.5273, + "step": 43340 + }, + { + "epoch": 1.85, + "learning_rate": 7.545959811885421e-06, + "loss": 0.5626, + "step": 43350 + }, + { + "epoch": 1.85, + "learning_rate": 7.524583155194529e-06, + "loss": 0.4999, + "step": 43360 + }, + { + "epoch": 1.85, + "learning_rate": 7.503206498503634e-06, + "loss": 0.5666, + "step": 43370 + }, + { + "epoch": 1.85, + "learning_rate": 7.48182984181274e-06, + "loss": 0.5158, + "step": 43380 + }, + { + "epoch": 1.85, + "learning_rate": 7.460453185121847e-06, + "loss": 0.524, + "step": 43390 + }, + { + "epoch": 1.85, + "learning_rate": 7.4390765284309535e-06, + "loss": 0.5447, + "step": 43400 + }, + { + "epoch": 1.85, + "learning_rate": 7.4176998717400605e-06, + "loss": 0.5059, + "step": 43410 + }, + { + "epoch": 1.85, + "learning_rate": 7.396323215049167e-06, + "loss": 0.5517, + "step": 43420 + }, + { + "epoch": 1.85, + "learning_rate": 7.374946558358273e-06, + "loss": 0.5059, + "step": 43430 + }, + { + "epoch": 1.85, + "learning_rate": 7.35356990166738e-06, + "loss": 0.5145, + "step": 43440 + }, + { + "epoch": 1.85, + "learning_rate": 7.332193244976486e-06, + "loss": 0.5574, + "step": 43450 + }, + { + "epoch": 1.85, + "learning_rate": 7.3108165882855915e-06, + "loss": 0.5033, + "step": 43460 + }, + { + "epoch": 1.85, + "learning_rate": 7.2894399315946994e-06, + "loss": 0.5653, + "step": 43470 + }, + { + "epoch": 1.85, + "learning_rate": 7.268063274903806e-06, + "loss": 0.5059, + "step": 43480 + }, + { + "epoch": 1.86, + "learning_rate": 7.246686618212911e-06, + "loss": 0.5133, + "step": 43490 + }, + { + "epoch": 1.86, + "learning_rate": 7.225309961522019e-06, + "loss": 0.5418, + "step": 43500 + }, + { + "epoch": 1.86, + "learning_rate": 7.203933304831124e-06, + "loss": 0.5009, + "step": 43510 + }, + { + "epoch": 1.86, + "learning_rate": 7.182556648140232e-06, + "loss": 0.5598, + "step": 43520 + }, + { + "epoch": 1.86, + "learning_rate": 7.1611799914493375e-06, + "loss": 0.517, + "step": 43530 + }, + { + "epoch": 1.86, + "learning_rate": 7.139803334758444e-06, + "loss": 0.5139, + "step": 43540 + }, + { + "epoch": 1.86, + "learning_rate": 7.118426678067551e-06, + "loss": 0.5522, + "step": 43550 + }, + { + "epoch": 1.86, + "learning_rate": 7.097050021376657e-06, + "loss": 0.5017, + "step": 43560 + }, + { + "epoch": 1.86, + "learning_rate": 7.075673364685763e-06, + "loss": 0.5556, + "step": 43570 + }, + { + "epoch": 1.86, + "learning_rate": 7.05429670799487e-06, + "loss": 0.5191, + "step": 43580 + }, + { + "epoch": 1.86, + "learning_rate": 7.0329200513039764e-06, + "loss": 0.5124, + "step": 43590 + }, + { + "epoch": 1.86, + "learning_rate": 7.0115433946130835e-06, + "loss": 0.5473, + "step": 43600 + }, + { + "epoch": 1.86, + "learning_rate": 6.99016673792219e-06, + "loss": 0.5112, + "step": 43610 + }, + { + "epoch": 1.86, + "learning_rate": 6.968790081231295e-06, + "loss": 0.5667, + "step": 43620 + }, + { + "epoch": 1.86, + "learning_rate": 6.947413424540403e-06, + "loss": 0.5144, + "step": 43630 + }, + { + "epoch": 1.86, + "learning_rate": 6.926036767849508e-06, + "loss": 0.5123, + "step": 43640 + }, + { + "epoch": 1.86, + "learning_rate": 6.9046601111586145e-06, + "loss": 0.5561, + "step": 43650 + }, + { + "epoch": 1.86, + "learning_rate": 6.8832834544677216e-06, + "loss": 0.5013, + "step": 43660 + }, + { + "epoch": 1.86, + "learning_rate": 6.861906797776828e-06, + "loss": 0.5632, + "step": 43670 + }, + { + "epoch": 1.86, + "learning_rate": 6.840530141085935e-06, + "loss": 0.5162, + "step": 43680 + }, + { + "epoch": 1.86, + "learning_rate": 6.819153484395041e-06, + "loss": 0.522, + "step": 43690 + }, + { + "epoch": 1.86, + "learning_rate": 6.797776827704147e-06, + "loss": 0.5467, + "step": 43700 + }, + { + "epoch": 1.86, + "learning_rate": 6.776400171013254e-06, + "loss": 0.506, + "step": 43710 + }, + { + "epoch": 1.87, + "learning_rate": 6.7550235143223605e-06, + "loss": 0.5666, + "step": 43720 + }, + { + "epoch": 1.87, + "learning_rate": 6.733646857631466e-06, + "loss": 0.5142, + "step": 43730 + }, + { + "epoch": 1.87, + "learning_rate": 6.712270200940574e-06, + "loss": 0.5148, + "step": 43740 + }, + { + "epoch": 1.87, + "learning_rate": 6.690893544249679e-06, + "loss": 0.5491, + "step": 43750 + }, + { + "epoch": 1.87, + "learning_rate": 6.669516887558785e-06, + "loss": 0.5048, + "step": 43760 + }, + { + "epoch": 1.87, + "learning_rate": 6.648140230867892e-06, + "loss": 0.568, + "step": 43770 + }, + { + "epoch": 1.87, + "learning_rate": 6.6267635741769986e-06, + "loss": 0.51, + "step": 43780 + }, + { + "epoch": 1.87, + "learning_rate": 6.6053869174861065e-06, + "loss": 0.5169, + "step": 43790 + }, + { + "epoch": 1.87, + "learning_rate": 6.584010260795212e-06, + "loss": 0.5524, + "step": 43800 + }, + { + "epoch": 1.87, + "learning_rate": 6.562633604104318e-06, + "loss": 0.4972, + "step": 43810 + }, + { + "epoch": 1.87, + "learning_rate": 6.541256947413425e-06, + "loss": 0.5626, + "step": 43820 + }, + { + "epoch": 1.87, + "learning_rate": 6.519880290722531e-06, + "loss": 0.5136, + "step": 43830 + }, + { + "epoch": 1.87, + "learning_rate": 6.4985036340316375e-06, + "loss": 0.5139, + "step": 43840 + }, + { + "epoch": 1.87, + "learning_rate": 6.4771269773407445e-06, + "loss": 0.5448, + "step": 43850 + }, + { + "epoch": 1.87, + "learning_rate": 6.455750320649851e-06, + "loss": 0.504, + "step": 43860 + }, + { + "epoch": 1.87, + "learning_rate": 6.434373663958958e-06, + "loss": 0.5622, + "step": 43870 + }, + { + "epoch": 1.87, + "learning_rate": 6.412997007268064e-06, + "loss": 0.5169, + "step": 43880 + }, + { + "epoch": 1.87, + "learning_rate": 6.391620350577169e-06, + "loss": 0.5125, + "step": 43890 + }, + { + "epoch": 1.87, + "learning_rate": 6.370243693886277e-06, + "loss": 0.5451, + "step": 43900 + }, + { + "epoch": 1.87, + "learning_rate": 6.348867037195383e-06, + "loss": 0.5, + "step": 43910 + }, + { + "epoch": 1.87, + "learning_rate": 6.327490380504489e-06, + "loss": 0.5554, + "step": 43920 + }, + { + "epoch": 1.87, + "learning_rate": 6.306113723813596e-06, + "loss": 0.5153, + "step": 43930 + }, + { + "epoch": 1.87, + "learning_rate": 6.284737067122702e-06, + "loss": 0.5142, + "step": 43940 + }, + { + "epoch": 1.87, + "learning_rate": 6.263360410431809e-06, + "loss": 0.5491, + "step": 43950 + }, + { + "epoch": 1.88, + "learning_rate": 6.241983753740915e-06, + "loss": 0.5065, + "step": 43960 + }, + { + "epoch": 1.88, + "learning_rate": 6.220607097050022e-06, + "loss": 0.5663, + "step": 43970 + }, + { + "epoch": 1.88, + "learning_rate": 6.199230440359128e-06, + "loss": 0.5183, + "step": 43980 + }, + { + "epoch": 1.88, + "learning_rate": 6.177853783668235e-06, + "loss": 0.5178, + "step": 43990 + }, + { + "epoch": 1.88, + "learning_rate": 6.156477126977341e-06, + "loss": 0.5539, + "step": 44000 + }, + { + "epoch": 1.88, + "learning_rate": 6.135100470286447e-06, + "loss": 0.5025, + "step": 44010 + }, + { + "epoch": 1.88, + "learning_rate": 6.113723813595553e-06, + "loss": 0.5581, + "step": 44020 + }, + { + "epoch": 1.88, + "learning_rate": 6.0923471569046605e-06, + "loss": 0.5098, + "step": 44030 + }, + { + "epoch": 1.88, + "learning_rate": 6.070970500213767e-06, + "loss": 0.5179, + "step": 44040 + }, + { + "epoch": 1.88, + "learning_rate": 6.049593843522873e-06, + "loss": 0.5497, + "step": 44050 + }, + { + "epoch": 1.88, + "learning_rate": 6.02821718683198e-06, + "loss": 0.5022, + "step": 44060 + }, + { + "epoch": 1.88, + "learning_rate": 6.006840530141086e-06, + "loss": 0.5635, + "step": 44070 + }, + { + "epoch": 1.88, + "learning_rate": 5.985463873450193e-06, + "loss": 0.5174, + "step": 44080 + }, + { + "epoch": 1.88, + "learning_rate": 5.9640872167592985e-06, + "loss": 0.5158, + "step": 44090 + }, + { + "epoch": 1.88, + "learning_rate": 5.942710560068406e-06, + "loss": 0.5526, + "step": 44100 + }, + { + "epoch": 1.88, + "learning_rate": 5.921333903377512e-06, + "loss": 0.4952, + "step": 44110 + }, + { + "epoch": 1.88, + "learning_rate": 5.899957246686619e-06, + "loss": 0.5581, + "step": 44120 + }, + { + "epoch": 1.88, + "learning_rate": 5.878580589995725e-06, + "loss": 0.5146, + "step": 44130 + }, + { + "epoch": 1.88, + "learning_rate": 5.857203933304831e-06, + "loss": 0.5283, + "step": 44140 + }, + { + "epoch": 1.88, + "learning_rate": 5.835827276613938e-06, + "loss": 0.5506, + "step": 44150 + }, + { + "epoch": 1.88, + "learning_rate": 5.8144506199230445e-06, + "loss": 0.501, + "step": 44160 + }, + { + "epoch": 1.88, + "learning_rate": 5.793073963232151e-06, + "loss": 0.5568, + "step": 44170 + }, + { + "epoch": 1.88, + "learning_rate": 5.771697306541257e-06, + "loss": 0.5214, + "step": 44180 + }, + { + "epoch": 1.89, + "learning_rate": 5.750320649850364e-06, + "loss": 0.5237, + "step": 44190 + }, + { + "epoch": 1.89, + "learning_rate": 5.72894399315947e-06, + "loss": 0.552, + "step": 44200 + }, + { + "epoch": 1.89, + "learning_rate": 5.707567336468576e-06, + "loss": 0.5071, + "step": 44210 + }, + { + "epoch": 1.89, + "learning_rate": 5.6861906797776834e-06, + "loss": 0.5658, + "step": 44220 + }, + { + "epoch": 1.89, + "learning_rate": 5.66481402308679e-06, + "loss": 0.5065, + "step": 44230 + }, + { + "epoch": 1.89, + "learning_rate": 5.643437366395897e-06, + "loss": 0.512, + "step": 44240 + }, + { + "epoch": 1.89, + "learning_rate": 5.622060709705002e-06, + "loss": 0.5527, + "step": 44250 + }, + { + "epoch": 1.89, + "learning_rate": 5.600684053014109e-06, + "loss": 0.5062, + "step": 44260 + }, + { + "epoch": 1.89, + "learning_rate": 5.579307396323215e-06, + "loss": 0.5532, + "step": 44270 + }, + { + "epoch": 1.89, + "learning_rate": 5.5579307396323215e-06, + "loss": 0.5115, + "step": 44280 + }, + { + "epoch": 1.89, + "learning_rate": 5.536554082941428e-06, + "loss": 0.5196, + "step": 44290 + }, + { + "epoch": 1.89, + "learning_rate": 5.515177426250535e-06, + "loss": 0.5455, + "step": 44300 + }, + { + "epoch": 1.89, + "learning_rate": 5.493800769559641e-06, + "loss": 0.4997, + "step": 44310 + }, + { + "epoch": 1.89, + "learning_rate": 5.472424112868747e-06, + "loss": 0.5656, + "step": 44320 + }, + { + "epoch": 1.89, + "learning_rate": 5.451047456177854e-06, + "loss": 0.5156, + "step": 44330 + }, + { + "epoch": 1.89, + "learning_rate": 5.4296707994869604e-06, + "loss": 0.5248, + "step": 44340 + }, + { + "epoch": 1.89, + "learning_rate": 5.4082941427960675e-06, + "loss": 0.549, + "step": 44350 + }, + { + "epoch": 1.89, + "learning_rate": 5.386917486105173e-06, + "loss": 0.5031, + "step": 44360 + }, + { + "epoch": 1.89, + "learning_rate": 5.36554082941428e-06, + "loss": 0.5632, + "step": 44370 + }, + { + "epoch": 1.89, + "learning_rate": 5.344164172723386e-06, + "loss": 0.5269, + "step": 44380 + }, + { + "epoch": 1.89, + "learning_rate": 5.322787516032493e-06, + "loss": 0.514, + "step": 44390 + }, + { + "epoch": 1.89, + "learning_rate": 5.301410859341599e-06, + "loss": 0.5573, + "step": 44400 + }, + { + "epoch": 1.89, + "learning_rate": 5.2800342026507056e-06, + "loss": 0.4945, + "step": 44410 + }, + { + "epoch": 1.9, + "learning_rate": 5.258657545959813e-06, + "loss": 0.5596, + "step": 44420 + }, + { + "epoch": 1.9, + "learning_rate": 5.237280889268919e-06, + "loss": 0.5104, + "step": 44430 + }, + { + "epoch": 1.9, + "learning_rate": 5.215904232578025e-06, + "loss": 0.5142, + "step": 44440 + }, + { + "epoch": 1.9, + "learning_rate": 5.194527575887131e-06, + "loss": 0.5499, + "step": 44450 + }, + { + "epoch": 1.9, + "learning_rate": 5.173150919196238e-06, + "loss": 0.4981, + "step": 44460 + }, + { + "epoch": 1.9, + "learning_rate": 5.1517742625053445e-06, + "loss": 0.5667, + "step": 44470 + }, + { + "epoch": 1.9, + "learning_rate": 5.130397605814451e-06, + "loss": 0.5113, + "step": 44480 + }, + { + "epoch": 1.9, + "learning_rate": 5.109020949123557e-06, + "loss": 0.5228, + "step": 44490 + }, + { + "epoch": 1.9, + "learning_rate": 5.087644292432664e-06, + "loss": 0.5447, + "step": 44500 + }, + { + "epoch": 1.9, + "learning_rate": 5.06626763574177e-06, + "loss": 0.5033, + "step": 44510 + }, + { + "epoch": 1.9, + "learning_rate": 5.044890979050876e-06, + "loss": 0.557, + "step": 44520 + }, + { + "epoch": 1.9, + "learning_rate": 5.023514322359983e-06, + "loss": 0.5137, + "step": 44530 + }, + { + "epoch": 1.9, + "learning_rate": 5.00213766566909e-06, + "loss": 0.5154, + "step": 44540 + }, + { + "epoch": 1.9, + "learning_rate": 4.980761008978196e-06, + "loss": 0.5396, + "step": 44550 + }, + { + "epoch": 1.9, + "learning_rate": 4.959384352287302e-06, + "loss": 0.5082, + "step": 44560 + }, + { + "epoch": 1.9, + "learning_rate": 4.938007695596409e-06, + "loss": 0.5665, + "step": 44570 + }, + { + "epoch": 1.9, + "learning_rate": 4.916631038905515e-06, + "loss": 0.5162, + "step": 44580 + }, + { + "epoch": 1.9, + "learning_rate": 4.8952543822146215e-06, + "loss": 0.5198, + "step": 44590 + }, + { + "epoch": 1.9, + "learning_rate": 4.8738777255237285e-06, + "loss": 0.5526, + "step": 44600 + }, + { + "epoch": 1.9, + "learning_rate": 4.852501068832835e-06, + "loss": 0.4984, + "step": 44610 + }, + { + "epoch": 1.9, + "learning_rate": 4.831124412141942e-06, + "loss": 0.5647, + "step": 44620 + }, + { + "epoch": 1.9, + "learning_rate": 4.809747755451047e-06, + "loss": 0.5077, + "step": 44630 + }, + { + "epoch": 1.9, + "learning_rate": 4.788371098760154e-06, + "loss": 0.5248, + "step": 44640 + }, + { + "epoch": 1.9, + "learning_rate": 4.76699444206926e-06, + "loss": 0.5499, + "step": 44650 + }, + { + "epoch": 1.91, + "learning_rate": 4.7456177853783675e-06, + "loss": 0.5014, + "step": 44660 + }, + { + "epoch": 1.91, + "learning_rate": 4.724241128687474e-06, + "loss": 0.5595, + "step": 44670 + }, + { + "epoch": 1.91, + "learning_rate": 4.70286447199658e-06, + "loss": 0.5133, + "step": 44680 + }, + { + "epoch": 1.91, + "learning_rate": 4.681487815305687e-06, + "loss": 0.5327, + "step": 44690 + }, + { + "epoch": 1.91, + "learning_rate": 4.660111158614793e-06, + "loss": 0.541, + "step": 44700 + }, + { + "epoch": 1.91, + "learning_rate": 4.638734501923899e-06, + "loss": 0.4943, + "step": 44710 + }, + { + "epoch": 1.91, + "learning_rate": 4.6173578452330055e-06, + "loss": 0.5684, + "step": 44720 + }, + { + "epoch": 1.91, + "learning_rate": 4.595981188542113e-06, + "loss": 0.5179, + "step": 44730 + }, + { + "epoch": 1.91, + "learning_rate": 4.574604531851219e-06, + "loss": 0.5231, + "step": 44740 + }, + { + "epoch": 1.91, + "learning_rate": 4.553227875160325e-06, + "loss": 0.5346, + "step": 44750 + }, + { + "epoch": 1.91, + "learning_rate": 4.531851218469431e-06, + "loss": 0.5048, + "step": 44760 + }, + { + "epoch": 1.91, + "learning_rate": 4.510474561778538e-06, + "loss": 0.5601, + "step": 44770 + }, + { + "epoch": 1.91, + "learning_rate": 4.4890979050876445e-06, + "loss": 0.5118, + "step": 44780 + }, + { + "epoch": 1.91, + "learning_rate": 4.467721248396751e-06, + "loss": 0.5176, + "step": 44790 + }, + { + "epoch": 1.91, + "learning_rate": 4.446344591705858e-06, + "loss": 0.5437, + "step": 44800 + }, + { + "epoch": 1.91, + "learning_rate": 4.424967935014964e-06, + "loss": 0.5068, + "step": 44810 + }, + { + "epoch": 1.91, + "learning_rate": 4.40359127832407e-06, + "loss": 0.5567, + "step": 44820 + }, + { + "epoch": 1.91, + "learning_rate": 4.382214621633176e-06, + "loss": 0.5122, + "step": 44830 + }, + { + "epoch": 1.91, + "learning_rate": 4.360837964942283e-06, + "loss": 0.5219, + "step": 44840 + }, + { + "epoch": 1.91, + "learning_rate": 4.33946130825139e-06, + "loss": 0.5458, + "step": 44850 + }, + { + "epoch": 1.91, + "learning_rate": 4.318084651560496e-06, + "loss": 0.5036, + "step": 44860 + }, + { + "epoch": 1.91, + "learning_rate": 4.296707994869603e-06, + "loss": 0.5615, + "step": 44870 + }, + { + "epoch": 1.91, + "learning_rate": 4.275331338178709e-06, + "loss": 0.5088, + "step": 44880 + }, + { + "epoch": 1.92, + "learning_rate": 4.253954681487816e-06, + "loss": 0.5225, + "step": 44890 + }, + { + "epoch": 1.92, + "learning_rate": 4.2325780247969215e-06, + "loss": 0.5501, + "step": 44900 + }, + { + "epoch": 1.92, + "learning_rate": 4.2112013681060285e-06, + "loss": 0.4966, + "step": 44910 + }, + { + "epoch": 1.92, + "learning_rate": 4.189824711415135e-06, + "loss": 0.5606, + "step": 44920 + }, + { + "epoch": 1.92, + "learning_rate": 4.168448054724242e-06, + "loss": 0.5131, + "step": 44930 + }, + { + "epoch": 1.92, + "learning_rate": 4.147071398033347e-06, + "loss": 0.5167, + "step": 44940 + }, + { + "epoch": 1.92, + "learning_rate": 4.125694741342454e-06, + "loss": 0.5414, + "step": 44950 + }, + { + "epoch": 1.92, + "learning_rate": 4.104318084651561e-06, + "loss": 0.5018, + "step": 44960 + }, + { + "epoch": 1.92, + "learning_rate": 4.0829414279606674e-06, + "loss": 0.5527, + "step": 44970 + }, + { + "epoch": 1.92, + "learning_rate": 4.061564771269774e-06, + "loss": 0.5169, + "step": 44980 + }, + { + "epoch": 1.92, + "learning_rate": 4.04018811457888e-06, + "loss": 0.5262, + "step": 44990 + }, + { + "epoch": 1.92, + "learning_rate": 4.018811457887987e-06, + "loss": 0.551, + "step": 45000 + }, + { + "epoch": 1.92, + "learning_rate": 3.997434801197093e-06, + "loss": 0.5051, + "step": 45010 + }, + { + "epoch": 1.92, + "learning_rate": 3.976058144506199e-06, + "loss": 0.5705, + "step": 45020 + }, + { + "epoch": 1.92, + "learning_rate": 3.9546814878153055e-06, + "loss": 0.5099, + "step": 45030 + }, + { + "epoch": 1.92, + "learning_rate": 3.9333048311244126e-06, + "loss": 0.5173, + "step": 45040 + }, + { + "epoch": 1.92, + "learning_rate": 3.911928174433519e-06, + "loss": 0.5525, + "step": 45050 + }, + { + "epoch": 1.92, + "learning_rate": 3.890551517742625e-06, + "loss": 0.5022, + "step": 45060 + }, + { + "epoch": 1.92, + "learning_rate": 3.869174861051732e-06, + "loss": 0.5544, + "step": 45070 + }, + { + "epoch": 1.92, + "learning_rate": 3.847798204360838e-06, + "loss": 0.5133, + "step": 45080 + }, + { + "epoch": 1.92, + "learning_rate": 3.8264215476699444e-06, + "loss": 0.5148, + "step": 45090 + }, + { + "epoch": 1.92, + "learning_rate": 3.805044890979051e-06, + "loss": 0.5559, + "step": 45100 + }, + { + "epoch": 1.92, + "learning_rate": 3.7836682342881577e-06, + "loss": 0.4956, + "step": 45110 + }, + { + "epoch": 1.92, + "learning_rate": 3.7622915775972643e-06, + "loss": 0.5639, + "step": 45120 + }, + { + "epoch": 1.93, + "learning_rate": 3.74091492090637e-06, + "loss": 0.4992, + "step": 45130 + }, + { + "epoch": 1.93, + "learning_rate": 3.7195382642154767e-06, + "loss": 0.5274, + "step": 45140 + }, + { + "epoch": 1.93, + "learning_rate": 3.6981616075245834e-06, + "loss": 0.5568, + "step": 45150 + }, + { + "epoch": 1.93, + "learning_rate": 3.67678495083369e-06, + "loss": 0.496, + "step": 45160 + }, + { + "epoch": 1.93, + "learning_rate": 3.6554082941427958e-06, + "loss": 0.5591, + "step": 45170 + }, + { + "epoch": 1.93, + "learning_rate": 3.634031637451903e-06, + "loss": 0.5132, + "step": 45180 + }, + { + "epoch": 1.93, + "learning_rate": 3.6126549807610095e-06, + "loss": 0.5102, + "step": 45190 + }, + { + "epoch": 1.93, + "learning_rate": 3.591278324070116e-06, + "loss": 0.5412, + "step": 45200 + }, + { + "epoch": 1.93, + "learning_rate": 3.569901667379222e-06, + "loss": 0.4961, + "step": 45210 + }, + { + "epoch": 1.93, + "learning_rate": 3.5485250106883285e-06, + "loss": 0.556, + "step": 45220 + }, + { + "epoch": 1.93, + "learning_rate": 3.527148353997435e-06, + "loss": 0.5214, + "step": 45230 + }, + { + "epoch": 1.93, + "learning_rate": 3.5057716973065417e-06, + "loss": 0.52, + "step": 45240 + }, + { + "epoch": 1.93, + "learning_rate": 3.4843950406156475e-06, + "loss": 0.546, + "step": 45250 + }, + { + "epoch": 1.93, + "learning_rate": 3.463018383924754e-06, + "loss": 0.4999, + "step": 45260 + }, + { + "epoch": 1.93, + "learning_rate": 3.4416417272338608e-06, + "loss": 0.5628, + "step": 45270 + }, + { + "epoch": 1.93, + "learning_rate": 3.4202650705429674e-06, + "loss": 0.5185, + "step": 45280 + }, + { + "epoch": 1.93, + "learning_rate": 3.3988884138520736e-06, + "loss": 0.5123, + "step": 45290 + }, + { + "epoch": 1.93, + "learning_rate": 3.3775117571611802e-06, + "loss": 0.5503, + "step": 45300 + }, + { + "epoch": 1.93, + "learning_rate": 3.356135100470287e-06, + "loss": 0.5033, + "step": 45310 + }, + { + "epoch": 1.93, + "learning_rate": 3.3347584437793927e-06, + "loss": 0.5605, + "step": 45320 + }, + { + "epoch": 1.93, + "learning_rate": 3.3133817870884993e-06, + "loss": 0.5105, + "step": 45330 + }, + { + "epoch": 1.93, + "learning_rate": 3.292005130397606e-06, + "loss": 0.5252, + "step": 45340 + }, + { + "epoch": 1.93, + "learning_rate": 3.2706284737067125e-06, + "loss": 0.5512, + "step": 45350 + }, + { + "epoch": 1.94, + "learning_rate": 3.2492518170158187e-06, + "loss": 0.4962, + "step": 45360 + }, + { + "epoch": 1.94, + "learning_rate": 3.2278751603249254e-06, + "loss": 0.5559, + "step": 45370 + }, + { + "epoch": 1.94, + "learning_rate": 3.206498503634032e-06, + "loss": 0.5138, + "step": 45380 + }, + { + "epoch": 1.94, + "learning_rate": 3.1851218469431386e-06, + "loss": 0.5135, + "step": 45390 + }, + { + "epoch": 1.94, + "learning_rate": 3.1637451902522444e-06, + "loss": 0.5462, + "step": 45400 + }, + { + "epoch": 1.94, + "learning_rate": 3.142368533561351e-06, + "loss": 0.5033, + "step": 45410 + }, + { + "epoch": 1.94, + "learning_rate": 3.1209918768704577e-06, + "loss": 0.5678, + "step": 45420 + }, + { + "epoch": 1.94, + "learning_rate": 3.099615220179564e-06, + "loss": 0.5046, + "step": 45430 + }, + { + "epoch": 1.94, + "learning_rate": 3.0782385634886705e-06, + "loss": 0.5194, + "step": 45440 + }, + { + "epoch": 1.94, + "learning_rate": 3.0568619067977767e-06, + "loss": 0.5476, + "step": 45450 + }, + { + "epoch": 1.94, + "learning_rate": 3.0354852501068833e-06, + "loss": 0.5, + "step": 45460 + }, + { + "epoch": 1.94, + "learning_rate": 3.01410859341599e-06, + "loss": 0.5694, + "step": 45470 + }, + { + "epoch": 1.94, + "learning_rate": 2.9927319367250966e-06, + "loss": 0.5191, + "step": 45480 + }, + { + "epoch": 1.94, + "learning_rate": 2.971355280034203e-06, + "loss": 0.5117, + "step": 45490 + }, + { + "epoch": 1.94, + "learning_rate": 2.9499786233433094e-06, + "loss": 0.5498, + "step": 45500 + }, + { + "epoch": 1.94, + "learning_rate": 2.9286019666524156e-06, + "loss": 0.4985, + "step": 45510 + }, + { + "epoch": 1.94, + "learning_rate": 2.9072253099615223e-06, + "loss": 0.5649, + "step": 45520 + }, + { + "epoch": 1.94, + "learning_rate": 2.8858486532706285e-06, + "loss": 0.5135, + "step": 45530 + }, + { + "epoch": 1.94, + "learning_rate": 2.864471996579735e-06, + "loss": 0.5233, + "step": 45540 + }, + { + "epoch": 1.94, + "learning_rate": 2.8430953398888417e-06, + "loss": 0.5406, + "step": 45550 + }, + { + "epoch": 1.94, + "learning_rate": 2.8217186831979483e-06, + "loss": 0.5069, + "step": 45560 + }, + { + "epoch": 1.94, + "learning_rate": 2.8003420265070546e-06, + "loss": 0.5656, + "step": 45570 + }, + { + "epoch": 1.94, + "learning_rate": 2.7789653698161608e-06, + "loss": 0.5166, + "step": 45580 + }, + { + "epoch": 1.94, + "learning_rate": 2.7575887131252674e-06, + "loss": 0.5159, + "step": 45590 + }, + { + "epoch": 1.95, + "learning_rate": 2.7362120564343736e-06, + "loss": 0.5441, + "step": 45600 + }, + { + "epoch": 1.95, + "learning_rate": 2.7148353997434802e-06, + "loss": 0.499, + "step": 45610 + }, + { + "epoch": 1.95, + "learning_rate": 2.6934587430525864e-06, + "loss": 0.5624, + "step": 45620 + }, + { + "epoch": 1.95, + "learning_rate": 2.672082086361693e-06, + "loss": 0.5165, + "step": 45630 + }, + { + "epoch": 1.95, + "learning_rate": 2.6507054296707997e-06, + "loss": 0.5171, + "step": 45640 + }, + { + "epoch": 1.95, + "learning_rate": 2.6293287729799063e-06, + "loss": 0.5401, + "step": 45650 + }, + { + "epoch": 1.95, + "learning_rate": 2.6079521162890125e-06, + "loss": 0.5031, + "step": 45660 + }, + { + "epoch": 1.95, + "learning_rate": 2.586575459598119e-06, + "loss": 0.5583, + "step": 45670 + }, + { + "epoch": 1.95, + "learning_rate": 2.5651988029072253e-06, + "loss": 0.5144, + "step": 45680 + }, + { + "epoch": 1.95, + "learning_rate": 2.543822146216332e-06, + "loss": 0.5251, + "step": 45690 + }, + { + "epoch": 1.95, + "learning_rate": 2.522445489525438e-06, + "loss": 0.5468, + "step": 45700 + }, + { + "epoch": 1.95, + "learning_rate": 2.501068832834545e-06, + "loss": 0.502, + "step": 45710 + }, + { + "epoch": 1.95, + "learning_rate": 2.479692176143651e-06, + "loss": 0.5657, + "step": 45720 + }, + { + "epoch": 1.95, + "learning_rate": 2.4583155194527576e-06, + "loss": 0.5112, + "step": 45730 + }, + { + "epoch": 1.95, + "learning_rate": 2.4369388627618643e-06, + "loss": 0.5223, + "step": 45740 + }, + { + "epoch": 1.95, + "learning_rate": 2.415562206070971e-06, + "loss": 0.559, + "step": 45750 + }, + { + "epoch": 1.95, + "learning_rate": 2.394185549380077e-06, + "loss": 0.4934, + "step": 45760 + }, + { + "epoch": 1.95, + "learning_rate": 2.3728088926891837e-06, + "loss": 0.5538, + "step": 45770 + }, + { + "epoch": 1.95, + "learning_rate": 2.35143223599829e-06, + "loss": 0.5193, + "step": 45780 + }, + { + "epoch": 1.95, + "learning_rate": 2.3300555793073966e-06, + "loss": 0.5197, + "step": 45790 + }, + { + "epoch": 1.95, + "learning_rate": 2.3086789226165028e-06, + "loss": 0.5506, + "step": 45800 + }, + { + "epoch": 1.95, + "learning_rate": 2.2873022659256094e-06, + "loss": 0.5065, + "step": 45810 + }, + { + "epoch": 1.95, + "learning_rate": 2.2659256092347156e-06, + "loss": 0.5607, + "step": 45820 + }, + { + "epoch": 1.96, + "learning_rate": 2.2445489525438222e-06, + "loss": 0.5093, + "step": 45830 + }, + { + "epoch": 1.96, + "learning_rate": 2.223172295852929e-06, + "loss": 0.5257, + "step": 45840 + }, + { + "epoch": 1.96, + "learning_rate": 2.201795639162035e-06, + "loss": 0.5583, + "step": 45850 + }, + { + "epoch": 1.96, + "learning_rate": 2.1804189824711417e-06, + "loss": 0.499, + "step": 45860 + }, + { + "epoch": 1.96, + "learning_rate": 2.159042325780248e-06, + "loss": 0.5638, + "step": 45870 + }, + { + "epoch": 1.96, + "learning_rate": 2.1376656690893545e-06, + "loss": 0.519, + "step": 45880 + }, + { + "epoch": 1.96, + "learning_rate": 2.1162890123984607e-06, + "loss": 0.5166, + "step": 45890 + }, + { + "epoch": 1.96, + "learning_rate": 2.0949123557075674e-06, + "loss": 0.552, + "step": 45900 + }, + { + "epoch": 1.96, + "learning_rate": 2.0735356990166736e-06, + "loss": 0.5035, + "step": 45910 + }, + { + "epoch": 1.96, + "learning_rate": 2.0521590423257806e-06, + "loss": 0.5601, + "step": 45920 + }, + { + "epoch": 1.96, + "learning_rate": 2.030782385634887e-06, + "loss": 0.5117, + "step": 45930 + }, + { + "epoch": 1.96, + "learning_rate": 2.0094057289439935e-06, + "loss": 0.5137, + "step": 45940 + }, + { + "epoch": 1.96, + "learning_rate": 1.9880290722530997e-06, + "loss": 0.5462, + "step": 45950 + }, + { + "epoch": 1.96, + "learning_rate": 1.9666524155622063e-06, + "loss": 0.4989, + "step": 45960 + }, + { + "epoch": 1.96, + "learning_rate": 1.9452757588713125e-06, + "loss": 0.5567, + "step": 45970 + }, + { + "epoch": 1.96, + "learning_rate": 1.923899102180419e-06, + "loss": 0.5115, + "step": 45980 + }, + { + "epoch": 1.96, + "learning_rate": 1.9025224454895255e-06, + "loss": 0.511, + "step": 45990 + }, + { + "epoch": 1.96, + "learning_rate": 1.8811457887986322e-06, + "loss": 0.5463, + "step": 46000 + }, + { + "epoch": 1.96, + "learning_rate": 1.8597691321077384e-06, + "loss": 0.5069, + "step": 46010 + }, + { + "epoch": 1.96, + "learning_rate": 1.838392475416845e-06, + "loss": 0.5675, + "step": 46020 + }, + { + "epoch": 1.96, + "learning_rate": 1.8170158187259514e-06, + "loss": 0.519, + "step": 46030 + }, + { + "epoch": 1.96, + "learning_rate": 1.795639162035058e-06, + "loss": 0.5113, + "step": 46040 + }, + { + "epoch": 1.96, + "learning_rate": 1.7742625053441642e-06, + "loss": 0.5314, + "step": 46050 + }, + { + "epoch": 1.96, + "learning_rate": 1.7528858486532709e-06, + "loss": 0.5022, + "step": 46060 + }, + { + "epoch": 1.97, + "learning_rate": 1.731509191962377e-06, + "loss": 0.5652, + "step": 46070 + }, + { + "epoch": 1.97, + "learning_rate": 1.7101325352714837e-06, + "loss": 0.5118, + "step": 46080 + }, + { + "epoch": 1.97, + "learning_rate": 1.6887558785805901e-06, + "loss": 0.5104, + "step": 46090 + }, + { + "epoch": 1.97, + "learning_rate": 1.6673792218896963e-06, + "loss": 0.5496, + "step": 46100 + }, + { + "epoch": 1.97, + "learning_rate": 1.646002565198803e-06, + "loss": 0.5052, + "step": 46110 + }, + { + "epoch": 1.97, + "learning_rate": 1.6246259085079094e-06, + "loss": 0.5558, + "step": 46120 + }, + { + "epoch": 1.97, + "learning_rate": 1.603249251817016e-06, + "loss": 0.5149, + "step": 46130 + }, + { + "epoch": 1.97, + "learning_rate": 1.5818725951261222e-06, + "loss": 0.5134, + "step": 46140 + }, + { + "epoch": 1.97, + "learning_rate": 1.5604959384352288e-06, + "loss": 0.5534, + "step": 46150 + }, + { + "epoch": 1.97, + "learning_rate": 1.5391192817443353e-06, + "loss": 0.4978, + "step": 46160 + }, + { + "epoch": 1.97, + "learning_rate": 1.5177426250534417e-06, + "loss": 0.553, + "step": 46170 + }, + { + "epoch": 1.97, + "learning_rate": 1.4963659683625483e-06, + "loss": 0.5211, + "step": 46180 + }, + { + "epoch": 1.97, + "learning_rate": 1.4749893116716547e-06, + "loss": 0.5135, + "step": 46190 + }, + { + "epoch": 1.97, + "learning_rate": 1.4536126549807611e-06, + "loss": 0.5457, + "step": 46200 + }, + { + "epoch": 1.97, + "learning_rate": 1.4322359982898675e-06, + "loss": 0.4989, + "step": 46210 + }, + { + "epoch": 1.97, + "learning_rate": 1.4108593415989742e-06, + "loss": 0.5647, + "step": 46220 + }, + { + "epoch": 1.97, + "learning_rate": 1.3894826849080804e-06, + "loss": 0.5102, + "step": 46230 + }, + { + "epoch": 1.97, + "learning_rate": 1.3681060282171868e-06, + "loss": 0.5151, + "step": 46240 + }, + { + "epoch": 1.97, + "learning_rate": 1.3467293715262932e-06, + "loss": 0.5475, + "step": 46250 + }, + { + "epoch": 1.97, + "learning_rate": 1.3253527148353998e-06, + "loss": 0.502, + "step": 46260 + }, + { + "epoch": 1.97, + "learning_rate": 1.3039760581445063e-06, + "loss": 0.5578, + "step": 46270 + }, + { + "epoch": 1.97, + "learning_rate": 1.2825994014536127e-06, + "loss": 0.5078, + "step": 46280 + }, + { + "epoch": 1.97, + "learning_rate": 1.261222744762719e-06, + "loss": 0.5188, + "step": 46290 + }, + { + "epoch": 1.98, + "learning_rate": 1.2398460880718255e-06, + "loss": 0.5539, + "step": 46300 + }, + { + "epoch": 1.98, + "learning_rate": 1.2184694313809321e-06, + "loss": 0.498, + "step": 46310 + }, + { + "epoch": 1.98, + "learning_rate": 1.1970927746900386e-06, + "loss": 0.5576, + "step": 46320 + }, + { + "epoch": 1.98, + "learning_rate": 1.175716117999145e-06, + "loss": 0.5165, + "step": 46330 + }, + { + "epoch": 1.98, + "learning_rate": 1.1543394613082514e-06, + "loss": 0.524, + "step": 46340 + }, + { + "epoch": 1.98, + "learning_rate": 1.1329628046173578e-06, + "loss": 0.5566, + "step": 46350 + }, + { + "epoch": 1.98, + "learning_rate": 1.1115861479264644e-06, + "loss": 0.5068, + "step": 46360 + }, + { + "epoch": 1.98, + "learning_rate": 1.0902094912355708e-06, + "loss": 0.5629, + "step": 46370 + }, + { + "epoch": 1.98, + "learning_rate": 1.0688328345446773e-06, + "loss": 0.5063, + "step": 46380 + }, + { + "epoch": 1.98, + "learning_rate": 1.0474561778537837e-06, + "loss": 0.5274, + "step": 46390 + }, + { + "epoch": 1.98, + "learning_rate": 1.0260795211628903e-06, + "loss": 0.5525, + "step": 46400 + }, + { + "epoch": 1.98, + "learning_rate": 1.0047028644719967e-06, + "loss": 0.5004, + "step": 46410 + }, + { + "epoch": 1.98, + "learning_rate": 9.833262077811031e-07, + "loss": 0.5528, + "step": 46420 + }, + { + "epoch": 1.98, + "learning_rate": 9.619495510902096e-07, + "loss": 0.5156, + "step": 46430 + }, + { + "epoch": 1.98, + "learning_rate": 9.405728943993161e-07, + "loss": 0.5121, + "step": 46440 + }, + { + "epoch": 1.98, + "learning_rate": 9.191962377084225e-07, + "loss": 0.5511, + "step": 46450 + }, + { + "epoch": 1.98, + "learning_rate": 8.97819581017529e-07, + "loss": 0.5045, + "step": 46460 + }, + { + "epoch": 1.98, + "learning_rate": 8.764429243266354e-07, + "loss": 0.5557, + "step": 46470 + }, + { + "epoch": 1.98, + "learning_rate": 8.550662676357419e-07, + "loss": 0.5069, + "step": 46480 + }, + { + "epoch": 1.98, + "learning_rate": 8.336896109448482e-07, + "loss": 0.5123, + "step": 46490 + }, + { + "epoch": 1.98, + "learning_rate": 8.123129542539547e-07, + "loss": 0.5495, + "step": 46500 + }, + { + "epoch": 1.98, + "learning_rate": 7.909362975630611e-07, + "loss": 0.5034, + "step": 46510 + }, + { + "epoch": 1.98, + "learning_rate": 7.695596408721676e-07, + "loss": 0.5571, + "step": 46520 + }, + { + "epoch": 1.99, + "learning_rate": 7.481829841812741e-07, + "loss": 0.5152, + "step": 46530 + }, + { + "epoch": 1.99, + "learning_rate": 7.268063274903806e-07, + "loss": 0.5227, + "step": 46540 + }, + { + "epoch": 1.99, + "learning_rate": 7.054296707994871e-07, + "loss": 0.5537, + "step": 46550 + }, + { + "epoch": 1.99, + "learning_rate": 6.840530141085934e-07, + "loss": 0.4961, + "step": 46560 + }, + { + "epoch": 1.99, + "learning_rate": 6.626763574176999e-07, + "loss": 0.5548, + "step": 46570 + }, + { + "epoch": 1.99, + "learning_rate": 6.412997007268063e-07, + "loss": 0.5136, + "step": 46580 + }, + { + "epoch": 1.99, + "learning_rate": 6.199230440359128e-07, + "loss": 0.5169, + "step": 46590 + }, + { + "epoch": 1.99, + "learning_rate": 5.985463873450193e-07, + "loss": 0.5556, + "step": 46600 + }, + { + "epoch": 1.99, + "learning_rate": 5.771697306541257e-07, + "loss": 0.4982, + "step": 46610 + }, + { + "epoch": 1.99, + "learning_rate": 5.557930739632322e-07, + "loss": 0.5576, + "step": 46620 + }, + { + "epoch": 1.99, + "learning_rate": 5.344164172723386e-07, + "loss": 0.5068, + "step": 46630 + }, + { + "epoch": 1.99, + "learning_rate": 5.130397605814452e-07, + "loss": 0.5199, + "step": 46640 + }, + { + "epoch": 1.99, + "learning_rate": 4.916631038905516e-07, + "loss": 0.5374, + "step": 46650 + }, + { + "epoch": 1.99, + "learning_rate": 4.7028644719965804e-07, + "loss": 0.4979, + "step": 46660 + }, + { + "epoch": 1.99, + "learning_rate": 4.489097905087645e-07, + "loss": 0.5569, + "step": 46670 + }, + { + "epoch": 1.99, + "learning_rate": 4.2753313381787093e-07, + "loss": 0.5127, + "step": 46680 + }, + { + "epoch": 1.99, + "learning_rate": 4.0615647712697734e-07, + "loss": 0.5196, + "step": 46690 + }, + { + "epoch": 1.99, + "learning_rate": 3.847798204360838e-07, + "loss": 0.5442, + "step": 46700 + }, + { + "epoch": 1.99, + "learning_rate": 3.634031637451903e-07, + "loss": 0.4944, + "step": 46710 + }, + { + "epoch": 1.99, + "learning_rate": 3.420265070542967e-07, + "loss": 0.5522, + "step": 46720 + }, + { + "epoch": 1.99, + "learning_rate": 3.2064985036340317e-07, + "loss": 0.511, + "step": 46730 + }, + { + "epoch": 1.99, + "learning_rate": 2.9927319367250964e-07, + "loss": 0.5214, + "step": 46740 + }, + { + "epoch": 1.99, + "learning_rate": 2.778965369816161e-07, + "loss": 0.5511, + "step": 46750 + }, + { + "epoch": 1.99, + "learning_rate": 2.565198802907226e-07, + "loss": 0.4981, + "step": 46760 + }, + { + "epoch": 2.0, + "learning_rate": 2.3514322359982902e-07, + "loss": 0.566, + "step": 46770 + }, + { + "epoch": 2.0, + "learning_rate": 2.1376656690893546e-07, + "loss": 0.5076, + "step": 46780 + }, + { + "epoch": 2.0, + "learning_rate": 1.923899102180419e-07, + "loss": 0.5226, + "step": 46790 + }, + { + "epoch": 2.0, + "learning_rate": 1.7101325352714835e-07, + "loss": 0.5459, + "step": 46800 + } + ], + "max_steps": 46880, + "num_train_epochs": 2, + "total_flos": 2.979121974892036e+19, + "trial_name": null, + "trial_params": null +}