{ "best_metric": 0.8769951462745667, "best_model_checkpoint": "/content/drive/MyDrive/Data Mining and Analysis/coursework/bert_fine_tune/logs/report_1/checkpoint-70000", "epoch": 7.860948554458905, "eval_steps": 5000, "global_step": 90000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9995632806358636e-05, "loss": 1.0403, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.999126561271727e-05, "loss": 0.9465, "step": 20 }, { "epoch": 0.0, "learning_rate": 4.998689841907591e-05, "loss": 0.9756, "step": 30 }, { "epoch": 0.0, "learning_rate": 4.9982531225434535e-05, "loss": 0.9459, "step": 40 }, { "epoch": 0.0, "learning_rate": 4.9978164031793175e-05, "loss": 1.0372, "step": 50 }, { "epoch": 0.01, "learning_rate": 4.99737968381518e-05, "loss": 0.9496, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.996942964451044e-05, "loss": 0.9111, "step": 70 }, { "epoch": 0.01, "learning_rate": 4.996506245086907e-05, "loss": 0.9431, "step": 80 }, { "epoch": 0.01, "learning_rate": 4.996069525722771e-05, "loss": 0.9688, "step": 90 }, { "epoch": 0.01, "learning_rate": 4.995632806358634e-05, "loss": 0.9417, "step": 100 }, { "epoch": 0.01, "learning_rate": 4.9951960869944974e-05, "loss": 0.9999, "step": 110 }, { "epoch": 0.01, "learning_rate": 4.9947593676303614e-05, "loss": 1.0532, "step": 120 }, { "epoch": 0.01, "learning_rate": 4.994322648266224e-05, "loss": 0.8558, "step": 130 }, { "epoch": 0.01, "learning_rate": 4.993885928902088e-05, "loss": 0.9832, "step": 140 }, { "epoch": 0.01, "learning_rate": 4.993449209537951e-05, "loss": 0.9702, "step": 150 }, { "epoch": 0.01, "learning_rate": 4.993012490173815e-05, "loss": 0.9343, "step": 160 }, { "epoch": 0.01, "learning_rate": 4.992575770809678e-05, "loss": 1.0262, "step": 170 }, { "epoch": 0.02, "learning_rate": 4.9921390514455414e-05, "loss": 0.8871, "step": 180 }, { "epoch": 0.02, "learning_rate": 4.991702332081405e-05, "loss": 1.0159, "step": 190 }, { "epoch": 0.02, "learning_rate": 4.991265612717268e-05, "loss": 0.9799, "step": 200 }, { "epoch": 0.02, "learning_rate": 4.990828893353131e-05, "loss": 1.0253, "step": 210 }, { "epoch": 0.02, "learning_rate": 4.9903921739889946e-05, "loss": 0.9639, "step": 220 }, { "epoch": 0.02, "learning_rate": 4.9899554546248586e-05, "loss": 0.9653, "step": 230 }, { "epoch": 0.02, "learning_rate": 4.989518735260721e-05, "loss": 0.9899, "step": 240 }, { "epoch": 0.02, "learning_rate": 4.989082015896585e-05, "loss": 1.0361, "step": 250 }, { "epoch": 0.02, "learning_rate": 4.9886452965324486e-05, "loss": 0.9525, "step": 260 }, { "epoch": 0.02, "learning_rate": 4.988208577168312e-05, "loss": 1.0019, "step": 270 }, { "epoch": 0.02, "learning_rate": 4.987771857804175e-05, "loss": 0.9388, "step": 280 }, { "epoch": 0.03, "learning_rate": 4.9873351384400386e-05, "loss": 0.9571, "step": 290 }, { "epoch": 0.03, "learning_rate": 4.986898419075902e-05, "loss": 0.9769, "step": 300 }, { "epoch": 0.03, "learning_rate": 4.986461699711765e-05, "loss": 0.8649, "step": 310 }, { "epoch": 0.03, "learning_rate": 4.986024980347629e-05, "loss": 0.8633, "step": 320 }, { "epoch": 0.03, "learning_rate": 4.9855882609834925e-05, "loss": 1.0568, "step": 330 }, { "epoch": 0.03, "learning_rate": 4.985151541619356e-05, "loss": 0.9646, "step": 340 }, { "epoch": 0.03, "learning_rate": 4.984714822255219e-05, "loss": 0.899, "step": 350 }, { "epoch": 0.03, "learning_rate": 4.9842781028910825e-05, "loss": 0.8334, "step": 360 }, { "epoch": 0.03, "learning_rate": 4.983841383526946e-05, "loss": 0.9937, "step": 370 }, { "epoch": 0.03, "learning_rate": 4.983404664162809e-05, "loss": 0.8855, "step": 380 }, { "epoch": 0.03, "learning_rate": 4.9829679447986724e-05, "loss": 0.8992, "step": 390 }, { "epoch": 0.03, "learning_rate": 4.982531225434536e-05, "loss": 1.104, "step": 400 }, { "epoch": 0.04, "learning_rate": 4.982094506070399e-05, "loss": 0.9004, "step": 410 }, { "epoch": 0.04, "learning_rate": 4.981657786706263e-05, "loss": 0.9589, "step": 420 }, { "epoch": 0.04, "learning_rate": 4.9812210673421264e-05, "loss": 0.9941, "step": 430 }, { "epoch": 0.04, "learning_rate": 4.98078434797799e-05, "loss": 0.8478, "step": 440 }, { "epoch": 0.04, "learning_rate": 4.980347628613853e-05, "loss": 0.9095, "step": 450 }, { "epoch": 0.04, "learning_rate": 4.9799109092497164e-05, "loss": 0.9783, "step": 460 }, { "epoch": 0.04, "learning_rate": 4.97947418988558e-05, "loss": 1.0161, "step": 470 }, { "epoch": 0.04, "learning_rate": 4.979037470521443e-05, "loss": 0.912, "step": 480 }, { "epoch": 0.04, "learning_rate": 4.978600751157306e-05, "loss": 0.9125, "step": 490 }, { "epoch": 0.04, "learning_rate": 4.9781640317931696e-05, "loss": 1.0088, "step": 500 }, { "epoch": 0.04, "learning_rate": 4.9777273124290336e-05, "loss": 0.9104, "step": 510 }, { "epoch": 0.05, "learning_rate": 4.977290593064897e-05, "loss": 1.0435, "step": 520 }, { "epoch": 0.05, "learning_rate": 4.97685387370076e-05, "loss": 0.8887, "step": 530 }, { "epoch": 0.05, "learning_rate": 4.9764171543366236e-05, "loss": 0.8453, "step": 540 }, { "epoch": 0.05, "learning_rate": 4.975980434972487e-05, "loss": 0.9301, "step": 550 }, { "epoch": 0.05, "learning_rate": 4.97554371560835e-05, "loss": 0.8271, "step": 560 }, { "epoch": 0.05, "learning_rate": 4.9751069962442136e-05, "loss": 1.0066, "step": 570 }, { "epoch": 0.05, "learning_rate": 4.9746702768800776e-05, "loss": 1.0448, "step": 580 }, { "epoch": 0.05, "learning_rate": 4.97423355751594e-05, "loss": 0.9184, "step": 590 }, { "epoch": 0.05, "learning_rate": 4.973796838151804e-05, "loss": 0.9653, "step": 600 }, { "epoch": 0.05, "learning_rate": 4.973360118787667e-05, "loss": 0.9661, "step": 610 }, { "epoch": 0.05, "learning_rate": 4.972923399423531e-05, "loss": 1.0116, "step": 620 }, { "epoch": 0.06, "learning_rate": 4.972486680059394e-05, "loss": 0.8806, "step": 630 }, { "epoch": 0.06, "learning_rate": 4.9720499606952575e-05, "loss": 0.9603, "step": 640 }, { "epoch": 0.06, "learning_rate": 4.971613241331121e-05, "loss": 0.9405, "step": 650 }, { "epoch": 0.06, "learning_rate": 4.971176521966984e-05, "loss": 1.0418, "step": 660 }, { "epoch": 0.06, "learning_rate": 4.970739802602848e-05, "loss": 0.8486, "step": 670 }, { "epoch": 0.06, "learning_rate": 4.970303083238711e-05, "loss": 0.9366, "step": 680 }, { "epoch": 0.06, "learning_rate": 4.969866363874575e-05, "loss": 0.9031, "step": 690 }, { "epoch": 0.06, "learning_rate": 4.9694296445104374e-05, "loss": 1.0206, "step": 700 }, { "epoch": 0.06, "learning_rate": 4.9689929251463014e-05, "loss": 1.0355, "step": 710 }, { "epoch": 0.06, "learning_rate": 4.968556205782165e-05, "loss": 0.9031, "step": 720 }, { "epoch": 0.06, "learning_rate": 4.968119486418028e-05, "loss": 1.0127, "step": 730 }, { "epoch": 0.06, "learning_rate": 4.9676827670538914e-05, "loss": 1.0261, "step": 740 }, { "epoch": 0.07, "learning_rate": 4.967246047689755e-05, "loss": 0.8633, "step": 750 }, { "epoch": 0.07, "learning_rate": 4.966809328325619e-05, "loss": 1.0313, "step": 760 }, { "epoch": 0.07, "learning_rate": 4.966372608961481e-05, "loss": 0.9448, "step": 770 }, { "epoch": 0.07, "learning_rate": 4.965935889597345e-05, "loss": 0.8489, "step": 780 }, { "epoch": 0.07, "learning_rate": 4.965499170233208e-05, "loss": 0.9398, "step": 790 }, { "epoch": 0.07, "learning_rate": 4.965062450869072e-05, "loss": 0.901, "step": 800 }, { "epoch": 0.07, "learning_rate": 4.9646257315049346e-05, "loss": 0.938, "step": 810 }, { "epoch": 0.07, "learning_rate": 4.9641890121407986e-05, "loss": 0.9577, "step": 820 }, { "epoch": 0.07, "learning_rate": 4.963752292776662e-05, "loss": 0.8499, "step": 830 }, { "epoch": 0.07, "learning_rate": 4.963315573412525e-05, "loss": 0.9224, "step": 840 }, { "epoch": 0.07, "learning_rate": 4.962878854048389e-05, "loss": 0.8919, "step": 850 }, { "epoch": 0.08, "learning_rate": 4.962442134684252e-05, "loss": 0.9534, "step": 860 }, { "epoch": 0.08, "learning_rate": 4.962005415320116e-05, "loss": 0.9699, "step": 870 }, { "epoch": 0.08, "learning_rate": 4.9615686959559785e-05, "loss": 1.0054, "step": 880 }, { "epoch": 0.08, "learning_rate": 4.9611319765918425e-05, "loss": 0.9884, "step": 890 }, { "epoch": 0.08, "learning_rate": 4.960695257227705e-05, "loss": 1.0228, "step": 900 }, { "epoch": 0.08, "learning_rate": 4.960258537863569e-05, "loss": 1.0274, "step": 910 }, { "epoch": 0.08, "learning_rate": 4.9598218184994325e-05, "loss": 0.9991, "step": 920 }, { "epoch": 0.08, "learning_rate": 4.959385099135296e-05, "loss": 0.9862, "step": 930 }, { "epoch": 0.08, "learning_rate": 4.958948379771159e-05, "loss": 0.8869, "step": 940 }, { "epoch": 0.08, "learning_rate": 4.9585116604070224e-05, "loss": 0.9661, "step": 950 }, { "epoch": 0.08, "learning_rate": 4.9580749410428864e-05, "loss": 0.9623, "step": 960 }, { "epoch": 0.08, "learning_rate": 4.957638221678749e-05, "loss": 0.9473, "step": 970 }, { "epoch": 0.09, "learning_rate": 4.957201502314613e-05, "loss": 0.9598, "step": 980 }, { "epoch": 0.09, "learning_rate": 4.9567647829504764e-05, "loss": 0.9164, "step": 990 }, { "epoch": 0.09, "learning_rate": 4.95632806358634e-05, "loss": 0.9441, "step": 1000 }, { "epoch": 0.09, "learning_rate": 4.955891344222203e-05, "loss": 1.0582, "step": 1010 }, { "epoch": 0.09, "learning_rate": 4.9554546248580664e-05, "loss": 0.9841, "step": 1020 }, { "epoch": 0.09, "learning_rate": 4.95501790549393e-05, "loss": 0.8739, "step": 1030 }, { "epoch": 0.09, "learning_rate": 4.954581186129793e-05, "loss": 0.9216, "step": 1040 }, { "epoch": 0.09, "learning_rate": 4.954144466765657e-05, "loss": 0.9129, "step": 1050 }, { "epoch": 0.09, "learning_rate": 4.9537077474015196e-05, "loss": 1.0256, "step": 1060 }, { "epoch": 0.09, "learning_rate": 4.9532710280373836e-05, "loss": 0.9285, "step": 1070 }, { "epoch": 0.09, "learning_rate": 4.952834308673247e-05, "loss": 0.9412, "step": 1080 }, { "epoch": 0.1, "learning_rate": 4.95239758930911e-05, "loss": 0.9598, "step": 1090 }, { "epoch": 0.1, "learning_rate": 4.9519608699449736e-05, "loss": 0.9926, "step": 1100 }, { "epoch": 0.1, "learning_rate": 4.951524150580837e-05, "loss": 0.981, "step": 1110 }, { "epoch": 0.1, "learning_rate": 4.9510874312167e-05, "loss": 0.9916, "step": 1120 }, { "epoch": 0.1, "learning_rate": 4.9506507118525636e-05, "loss": 0.9936, "step": 1130 }, { "epoch": 0.1, "learning_rate": 4.950213992488427e-05, "loss": 0.9909, "step": 1140 }, { "epoch": 0.1, "learning_rate": 4.94977727312429e-05, "loss": 0.8959, "step": 1150 }, { "epoch": 0.1, "learning_rate": 4.949340553760154e-05, "loss": 0.9105, "step": 1160 }, { "epoch": 0.1, "learning_rate": 4.9489038343960175e-05, "loss": 1.0268, "step": 1170 }, { "epoch": 0.1, "learning_rate": 4.948467115031881e-05, "loss": 0.9223, "step": 1180 }, { "epoch": 0.1, "learning_rate": 4.948030395667744e-05, "loss": 0.8919, "step": 1190 }, { "epoch": 0.1, "learning_rate": 4.9475936763036075e-05, "loss": 1.0177, "step": 1200 }, { "epoch": 0.11, "learning_rate": 4.947156956939471e-05, "loss": 0.9374, "step": 1210 }, { "epoch": 0.11, "learning_rate": 4.946720237575334e-05, "loss": 0.9679, "step": 1220 }, { "epoch": 0.11, "learning_rate": 4.9462835182111974e-05, "loss": 0.8975, "step": 1230 }, { "epoch": 0.11, "learning_rate": 4.9458467988470614e-05, "loss": 0.9361, "step": 1240 }, { "epoch": 0.11, "learning_rate": 4.945410079482925e-05, "loss": 1.0163, "step": 1250 }, { "epoch": 0.11, "learning_rate": 4.944973360118788e-05, "loss": 1.0202, "step": 1260 }, { "epoch": 0.11, "learning_rate": 4.9445366407546514e-05, "loss": 1.0489, "step": 1270 }, { "epoch": 0.11, "learning_rate": 4.944099921390515e-05, "loss": 0.918, "step": 1280 }, { "epoch": 0.11, "learning_rate": 4.943663202026378e-05, "loss": 0.9559, "step": 1290 }, { "epoch": 0.11, "learning_rate": 4.9432264826622414e-05, "loss": 1.0352, "step": 1300 }, { "epoch": 0.11, "learning_rate": 4.942789763298105e-05, "loss": 0.9161, "step": 1310 }, { "epoch": 0.12, "learning_rate": 4.942353043933968e-05, "loss": 0.9595, "step": 1320 }, { "epoch": 0.12, "learning_rate": 4.941916324569832e-05, "loss": 0.9689, "step": 1330 }, { "epoch": 0.12, "learning_rate": 4.9414796052056946e-05, "loss": 0.9298, "step": 1340 }, { "epoch": 0.12, "learning_rate": 4.9410428858415586e-05, "loss": 0.9834, "step": 1350 }, { "epoch": 0.12, "learning_rate": 4.940606166477422e-05, "loss": 1.0293, "step": 1360 }, { "epoch": 0.12, "learning_rate": 4.940169447113285e-05, "loss": 0.9291, "step": 1370 }, { "epoch": 0.12, "learning_rate": 4.9397327277491486e-05, "loss": 0.9656, "step": 1380 }, { "epoch": 0.12, "learning_rate": 4.939296008385012e-05, "loss": 0.9721, "step": 1390 }, { "epoch": 0.12, "learning_rate": 4.938859289020876e-05, "loss": 0.8988, "step": 1400 }, { "epoch": 0.12, "learning_rate": 4.9384225696567386e-05, "loss": 0.8917, "step": 1410 }, { "epoch": 0.12, "learning_rate": 4.9379858502926026e-05, "loss": 0.9286, "step": 1420 }, { "epoch": 0.12, "learning_rate": 4.937549130928465e-05, "loss": 0.9466, "step": 1430 }, { "epoch": 0.13, "learning_rate": 4.937112411564329e-05, "loss": 0.9644, "step": 1440 }, { "epoch": 0.13, "learning_rate": 4.9366756922001925e-05, "loss": 1.015, "step": 1450 }, { "epoch": 0.13, "learning_rate": 4.936238972836056e-05, "loss": 0.9779, "step": 1460 }, { "epoch": 0.13, "learning_rate": 4.935802253471919e-05, "loss": 0.855, "step": 1470 }, { "epoch": 0.13, "learning_rate": 4.9353655341077825e-05, "loss": 0.9618, "step": 1480 }, { "epoch": 0.13, "learning_rate": 4.9349288147436465e-05, "loss": 0.9882, "step": 1490 }, { "epoch": 0.13, "learning_rate": 4.934492095379509e-05, "loss": 0.9304, "step": 1500 }, { "epoch": 0.13, "learning_rate": 4.934055376015373e-05, "loss": 1.0002, "step": 1510 }, { "epoch": 0.13, "learning_rate": 4.933618656651236e-05, "loss": 0.8864, "step": 1520 }, { "epoch": 0.13, "learning_rate": 4.9331819372871e-05, "loss": 0.9655, "step": 1530 }, { "epoch": 0.13, "learning_rate": 4.9327452179229624e-05, "loss": 0.9529, "step": 1540 }, { "epoch": 0.14, "learning_rate": 4.9323084985588264e-05, "loss": 0.8572, "step": 1550 }, { "epoch": 0.14, "learning_rate": 4.93187177919469e-05, "loss": 0.918, "step": 1560 }, { "epoch": 0.14, "learning_rate": 4.931435059830553e-05, "loss": 1.0434, "step": 1570 }, { "epoch": 0.14, "learning_rate": 4.930998340466417e-05, "loss": 0.8926, "step": 1580 }, { "epoch": 0.14, "learning_rate": 4.93056162110228e-05, "loss": 1.0405, "step": 1590 }, { "epoch": 0.14, "learning_rate": 4.930124901738144e-05, "loss": 0.9279, "step": 1600 }, { "epoch": 0.14, "learning_rate": 4.929688182374006e-05, "loss": 0.948, "step": 1610 }, { "epoch": 0.14, "learning_rate": 4.92925146300987e-05, "loss": 0.8886, "step": 1620 }, { "epoch": 0.14, "learning_rate": 4.928814743645733e-05, "loss": 1.016, "step": 1630 }, { "epoch": 0.14, "learning_rate": 4.928378024281597e-05, "loss": 0.9776, "step": 1640 }, { "epoch": 0.14, "learning_rate": 4.92794130491746e-05, "loss": 0.9765, "step": 1650 }, { "epoch": 0.14, "learning_rate": 4.9275045855533236e-05, "loss": 1.0266, "step": 1660 }, { "epoch": 0.15, "learning_rate": 4.927067866189187e-05, "loss": 0.8994, "step": 1670 }, { "epoch": 0.15, "learning_rate": 4.92663114682505e-05, "loss": 0.9955, "step": 1680 }, { "epoch": 0.15, "learning_rate": 4.926194427460914e-05, "loss": 0.8983, "step": 1690 }, { "epoch": 0.15, "learning_rate": 4.925757708096777e-05, "loss": 0.986, "step": 1700 }, { "epoch": 0.15, "learning_rate": 4.925320988732641e-05, "loss": 0.959, "step": 1710 }, { "epoch": 0.15, "learning_rate": 4.9248842693685035e-05, "loss": 1.0302, "step": 1720 }, { "epoch": 0.15, "learning_rate": 4.9244475500043675e-05, "loss": 0.9265, "step": 1730 }, { "epoch": 0.15, "learning_rate": 4.924010830640231e-05, "loss": 0.9686, "step": 1740 }, { "epoch": 0.15, "learning_rate": 4.923574111276094e-05, "loss": 0.9412, "step": 1750 }, { "epoch": 0.15, "learning_rate": 4.9231373919119575e-05, "loss": 0.9361, "step": 1760 }, { "epoch": 0.15, "learning_rate": 4.922700672547821e-05, "loss": 1.0435, "step": 1770 }, { "epoch": 0.16, "learning_rate": 4.922263953183685e-05, "loss": 1.0142, "step": 1780 }, { "epoch": 0.16, "learning_rate": 4.9218272338195474e-05, "loss": 0.9519, "step": 1790 }, { "epoch": 0.16, "learning_rate": 4.9213905144554114e-05, "loss": 0.9459, "step": 1800 }, { "epoch": 0.16, "learning_rate": 4.920953795091275e-05, "loss": 0.9458, "step": 1810 }, { "epoch": 0.16, "learning_rate": 4.920517075727138e-05, "loss": 1.0128, "step": 1820 }, { "epoch": 0.16, "learning_rate": 4.9200803563630014e-05, "loss": 0.9978, "step": 1830 }, { "epoch": 0.16, "learning_rate": 4.919643636998865e-05, "loss": 1.0136, "step": 1840 }, { "epoch": 0.16, "learning_rate": 4.919206917634728e-05, "loss": 0.9648, "step": 1850 }, { "epoch": 0.16, "learning_rate": 4.9187701982705914e-05, "loss": 0.9627, "step": 1860 }, { "epoch": 0.16, "learning_rate": 4.918333478906455e-05, "loss": 0.9315, "step": 1870 }, { "epoch": 0.16, "learning_rate": 4.917896759542318e-05, "loss": 0.9317, "step": 1880 }, { "epoch": 0.17, "learning_rate": 4.917460040178182e-05, "loss": 0.9756, "step": 1890 }, { "epoch": 0.17, "learning_rate": 4.917023320814045e-05, "loss": 0.8983, "step": 1900 }, { "epoch": 0.17, "learning_rate": 4.9165866014499086e-05, "loss": 0.9227, "step": 1910 }, { "epoch": 0.17, "learning_rate": 4.916149882085772e-05, "loss": 1.0251, "step": 1920 }, { "epoch": 0.17, "learning_rate": 4.915713162721635e-05, "loss": 0.9369, "step": 1930 }, { "epoch": 0.17, "learning_rate": 4.9152764433574986e-05, "loss": 1.0062, "step": 1940 }, { "epoch": 0.17, "learning_rate": 4.914839723993362e-05, "loss": 1.0771, "step": 1950 }, { "epoch": 0.17, "learning_rate": 4.914403004629225e-05, "loss": 0.9042, "step": 1960 }, { "epoch": 0.17, "learning_rate": 4.9139662852650886e-05, "loss": 0.9503, "step": 1970 }, { "epoch": 0.17, "learning_rate": 4.9135295659009526e-05, "loss": 0.914, "step": 1980 }, { "epoch": 0.17, "learning_rate": 4.913092846536816e-05, "loss": 0.906, "step": 1990 }, { "epoch": 0.17, "learning_rate": 4.912656127172679e-05, "loss": 0.9457, "step": 2000 }, { "epoch": 0.18, "learning_rate": 4.9122194078085425e-05, "loss": 0.8958, "step": 2010 }, { "epoch": 0.18, "learning_rate": 4.911782688444406e-05, "loss": 0.952, "step": 2020 }, { "epoch": 0.18, "learning_rate": 4.911345969080269e-05, "loss": 0.9643, "step": 2030 }, { "epoch": 0.18, "learning_rate": 4.9109092497161325e-05, "loss": 0.8752, "step": 2040 }, { "epoch": 0.18, "learning_rate": 4.910472530351996e-05, "loss": 0.9656, "step": 2050 }, { "epoch": 0.18, "learning_rate": 4.91003581098786e-05, "loss": 0.9661, "step": 2060 }, { "epoch": 0.18, "learning_rate": 4.9095990916237224e-05, "loss": 1.0128, "step": 2070 }, { "epoch": 0.18, "learning_rate": 4.9091623722595864e-05, "loss": 0.9888, "step": 2080 }, { "epoch": 0.18, "learning_rate": 4.90872565289545e-05, "loss": 0.96, "step": 2090 }, { "epoch": 0.18, "learning_rate": 4.908288933531313e-05, "loss": 0.9634, "step": 2100 }, { "epoch": 0.18, "learning_rate": 4.9078522141671764e-05, "loss": 0.9381, "step": 2110 }, { "epoch": 0.19, "learning_rate": 4.90741549480304e-05, "loss": 0.8479, "step": 2120 }, { "epoch": 0.19, "learning_rate": 4.906978775438903e-05, "loss": 0.9178, "step": 2130 }, { "epoch": 0.19, "learning_rate": 4.9065420560747664e-05, "loss": 0.8983, "step": 2140 }, { "epoch": 0.19, "learning_rate": 4.9061053367106304e-05, "loss": 0.8988, "step": 2150 }, { "epoch": 0.19, "learning_rate": 4.905668617346493e-05, "loss": 0.812, "step": 2160 }, { "epoch": 0.19, "learning_rate": 4.905231897982357e-05, "loss": 0.949, "step": 2170 }, { "epoch": 0.19, "learning_rate": 4.90479517861822e-05, "loss": 0.9629, "step": 2180 }, { "epoch": 0.19, "learning_rate": 4.9043584592540836e-05, "loss": 1.0334, "step": 2190 }, { "epoch": 0.19, "learning_rate": 4.903921739889947e-05, "loss": 1.0536, "step": 2200 }, { "epoch": 0.19, "learning_rate": 4.90348502052581e-05, "loss": 0.9478, "step": 2210 }, { "epoch": 0.19, "learning_rate": 4.903048301161674e-05, "loss": 0.9571, "step": 2220 }, { "epoch": 0.19, "learning_rate": 4.902611581797537e-05, "loss": 0.9597, "step": 2230 }, { "epoch": 0.2, "learning_rate": 4.902174862433401e-05, "loss": 0.791, "step": 2240 }, { "epoch": 0.2, "learning_rate": 4.9017381430692636e-05, "loss": 0.9065, "step": 2250 }, { "epoch": 0.2, "learning_rate": 4.9013014237051276e-05, "loss": 1.04, "step": 2260 }, { "epoch": 0.2, "learning_rate": 4.90086470434099e-05, "loss": 0.9144, "step": 2270 }, { "epoch": 0.2, "learning_rate": 4.900427984976854e-05, "loss": 0.8441, "step": 2280 }, { "epoch": 0.2, "learning_rate": 4.8999912656127175e-05, "loss": 0.996, "step": 2290 }, { "epoch": 0.2, "learning_rate": 4.899554546248581e-05, "loss": 0.8924, "step": 2300 }, { "epoch": 0.2, "learning_rate": 4.899117826884445e-05, "loss": 0.9115, "step": 2310 }, { "epoch": 0.2, "learning_rate": 4.8986811075203075e-05, "loss": 0.9701, "step": 2320 }, { "epoch": 0.2, "learning_rate": 4.8982443881561715e-05, "loss": 0.9551, "step": 2330 }, { "epoch": 0.2, "learning_rate": 4.897807668792034e-05, "loss": 0.9846, "step": 2340 }, { "epoch": 0.21, "learning_rate": 4.897370949427898e-05, "loss": 0.9265, "step": 2350 }, { "epoch": 0.21, "learning_rate": 4.896934230063761e-05, "loss": 0.9866, "step": 2360 }, { "epoch": 0.21, "learning_rate": 4.896497510699625e-05, "loss": 1.0033, "step": 2370 }, { "epoch": 0.21, "learning_rate": 4.896060791335488e-05, "loss": 0.8552, "step": 2380 }, { "epoch": 0.21, "learning_rate": 4.8956240719713514e-05, "loss": 0.8984, "step": 2390 }, { "epoch": 0.21, "learning_rate": 4.895187352607215e-05, "loss": 1.0317, "step": 2400 }, { "epoch": 0.21, "learning_rate": 4.894750633243078e-05, "loss": 0.8503, "step": 2410 }, { "epoch": 0.21, "learning_rate": 4.894313913878942e-05, "loss": 1.0195, "step": 2420 }, { "epoch": 0.21, "learning_rate": 4.893877194514805e-05, "loss": 1.0256, "step": 2430 }, { "epoch": 0.21, "learning_rate": 4.893440475150669e-05, "loss": 1.0027, "step": 2440 }, { "epoch": 0.21, "learning_rate": 4.893003755786531e-05, "loss": 0.9532, "step": 2450 }, { "epoch": 0.21, "learning_rate": 4.892567036422395e-05, "loss": 1.0081, "step": 2460 }, { "epoch": 0.22, "learning_rate": 4.8921303170582586e-05, "loss": 0.9046, "step": 2470 }, { "epoch": 0.22, "learning_rate": 4.891693597694122e-05, "loss": 0.9324, "step": 2480 }, { "epoch": 0.22, "learning_rate": 4.891256878329985e-05, "loss": 0.9082, "step": 2490 }, { "epoch": 0.22, "learning_rate": 4.8908201589658486e-05, "loss": 0.9223, "step": 2500 }, { "epoch": 0.22, "learning_rate": 4.8903834396017126e-05, "loss": 0.9729, "step": 2510 }, { "epoch": 0.22, "learning_rate": 4.889946720237575e-05, "loss": 0.9788, "step": 2520 }, { "epoch": 0.22, "learning_rate": 4.889510000873439e-05, "loss": 0.975, "step": 2530 }, { "epoch": 0.22, "learning_rate": 4.889073281509302e-05, "loss": 1.0072, "step": 2540 }, { "epoch": 0.22, "learning_rate": 4.888636562145166e-05, "loss": 1.0536, "step": 2550 }, { "epoch": 0.22, "learning_rate": 4.888199842781029e-05, "loss": 0.9831, "step": 2560 }, { "epoch": 0.22, "learning_rate": 4.8877631234168925e-05, "loss": 0.9724, "step": 2570 }, { "epoch": 0.23, "learning_rate": 4.887326404052756e-05, "loss": 0.962, "step": 2580 }, { "epoch": 0.23, "learning_rate": 4.886889684688619e-05, "loss": 0.9508, "step": 2590 }, { "epoch": 0.23, "learning_rate": 4.8864529653244825e-05, "loss": 0.9489, "step": 2600 }, { "epoch": 0.23, "learning_rate": 4.886016245960346e-05, "loss": 1.0164, "step": 2610 }, { "epoch": 0.23, "learning_rate": 4.88557952659621e-05, "loss": 0.9989, "step": 2620 }, { "epoch": 0.23, "learning_rate": 4.8851428072320725e-05, "loss": 0.8959, "step": 2630 }, { "epoch": 0.23, "learning_rate": 4.8847060878679364e-05, "loss": 0.9129, "step": 2640 }, { "epoch": 0.23, "learning_rate": 4.8842693685038e-05, "loss": 0.9048, "step": 2650 }, { "epoch": 0.23, "learning_rate": 4.883832649139663e-05, "loss": 0.9269, "step": 2660 }, { "epoch": 0.23, "learning_rate": 4.8833959297755264e-05, "loss": 0.9609, "step": 2670 }, { "epoch": 0.23, "learning_rate": 4.88295921041139e-05, "loss": 1.0289, "step": 2680 }, { "epoch": 0.23, "learning_rate": 4.882522491047253e-05, "loss": 0.9198, "step": 2690 }, { "epoch": 0.24, "learning_rate": 4.8820857716831164e-05, "loss": 0.9428, "step": 2700 }, { "epoch": 0.24, "learning_rate": 4.8816490523189804e-05, "loss": 0.8933, "step": 2710 }, { "epoch": 0.24, "learning_rate": 4.881212332954844e-05, "loss": 0.9559, "step": 2720 }, { "epoch": 0.24, "learning_rate": 4.880775613590707e-05, "loss": 0.9619, "step": 2730 }, { "epoch": 0.24, "learning_rate": 4.88033889422657e-05, "loss": 1.0151, "step": 2740 }, { "epoch": 0.24, "learning_rate": 4.8799021748624337e-05, "loss": 0.9329, "step": 2750 }, { "epoch": 0.24, "learning_rate": 4.879465455498297e-05, "loss": 0.9951, "step": 2760 }, { "epoch": 0.24, "learning_rate": 4.87902873613416e-05, "loss": 0.9182, "step": 2770 }, { "epoch": 0.24, "learning_rate": 4.8785920167700236e-05, "loss": 0.8708, "step": 2780 }, { "epoch": 0.24, "learning_rate": 4.878155297405887e-05, "loss": 0.9361, "step": 2790 }, { "epoch": 0.24, "learning_rate": 4.87771857804175e-05, "loss": 1.0204, "step": 2800 }, { "epoch": 0.25, "learning_rate": 4.877281858677614e-05, "loss": 0.9972, "step": 2810 }, { "epoch": 0.25, "learning_rate": 4.8768451393134776e-05, "loss": 0.958, "step": 2820 }, { "epoch": 0.25, "learning_rate": 4.876408419949341e-05, "loss": 0.9688, "step": 2830 }, { "epoch": 0.25, "learning_rate": 4.875971700585204e-05, "loss": 1.0413, "step": 2840 }, { "epoch": 0.25, "learning_rate": 4.8755349812210675e-05, "loss": 0.9984, "step": 2850 }, { "epoch": 0.25, "learning_rate": 4.875098261856931e-05, "loss": 1.0529, "step": 2860 }, { "epoch": 0.25, "learning_rate": 4.874661542492794e-05, "loss": 0.9634, "step": 2870 }, { "epoch": 0.25, "learning_rate": 4.874224823128658e-05, "loss": 0.9478, "step": 2880 }, { "epoch": 0.25, "learning_rate": 4.873788103764521e-05, "loss": 0.8547, "step": 2890 }, { "epoch": 0.25, "learning_rate": 4.873351384400385e-05, "loss": 0.9699, "step": 2900 }, { "epoch": 0.25, "learning_rate": 4.872914665036248e-05, "loss": 0.8683, "step": 2910 }, { "epoch": 0.26, "learning_rate": 4.8724779456721115e-05, "loss": 0.911, "step": 2920 }, { "epoch": 0.26, "learning_rate": 4.872041226307975e-05, "loss": 0.8428, "step": 2930 }, { "epoch": 0.26, "learning_rate": 4.871604506943838e-05, "loss": 0.842, "step": 2940 }, { "epoch": 0.26, "learning_rate": 4.8711677875797014e-05, "loss": 0.8891, "step": 2950 }, { "epoch": 0.26, "learning_rate": 4.870731068215565e-05, "loss": 0.9217, "step": 2960 }, { "epoch": 0.26, "learning_rate": 4.870294348851429e-05, "loss": 0.9264, "step": 2970 }, { "epoch": 0.26, "learning_rate": 4.8698576294872914e-05, "loss": 0.9735, "step": 2980 }, { "epoch": 0.26, "learning_rate": 4.8694209101231554e-05, "loss": 0.9314, "step": 2990 }, { "epoch": 0.26, "learning_rate": 4.868984190759018e-05, "loss": 0.9456, "step": 3000 }, { "epoch": 0.26, "learning_rate": 4.868547471394882e-05, "loss": 1.0547, "step": 3010 }, { "epoch": 0.26, "learning_rate": 4.868110752030745e-05, "loss": 0.934, "step": 3020 }, { "epoch": 0.26, "learning_rate": 4.8676740326666087e-05, "loss": 0.8343, "step": 3030 }, { "epoch": 0.27, "learning_rate": 4.867237313302472e-05, "loss": 1.0218, "step": 3040 }, { "epoch": 0.27, "learning_rate": 4.866800593938335e-05, "loss": 1.0145, "step": 3050 }, { "epoch": 0.27, "learning_rate": 4.866363874574199e-05, "loss": 0.8718, "step": 3060 }, { "epoch": 0.27, "learning_rate": 4.865927155210062e-05, "loss": 0.901, "step": 3070 }, { "epoch": 0.27, "learning_rate": 4.865490435845926e-05, "loss": 0.8845, "step": 3080 }, { "epoch": 0.27, "learning_rate": 4.8650537164817886e-05, "loss": 0.961, "step": 3090 }, { "epoch": 0.27, "learning_rate": 4.8646169971176526e-05, "loss": 0.9009, "step": 3100 }, { "epoch": 0.27, "learning_rate": 4.864180277753516e-05, "loss": 0.9523, "step": 3110 }, { "epoch": 0.27, "learning_rate": 4.863743558389379e-05, "loss": 0.8963, "step": 3120 }, { "epoch": 0.27, "learning_rate": 4.8633068390252425e-05, "loss": 0.8846, "step": 3130 }, { "epoch": 0.27, "learning_rate": 4.862870119661106e-05, "loss": 0.9082, "step": 3140 }, { "epoch": 0.28, "learning_rate": 4.86243340029697e-05, "loss": 0.9656, "step": 3150 }, { "epoch": 0.28, "learning_rate": 4.8619966809328325e-05, "loss": 0.8836, "step": 3160 }, { "epoch": 0.28, "learning_rate": 4.8615599615686965e-05, "loss": 1.0194, "step": 3170 }, { "epoch": 0.28, "learning_rate": 4.861123242204559e-05, "loss": 1.0275, "step": 3180 }, { "epoch": 0.28, "learning_rate": 4.860686522840423e-05, "loss": 0.8716, "step": 3190 }, { "epoch": 0.28, "learning_rate": 4.860249803476286e-05, "loss": 0.9184, "step": 3200 }, { "epoch": 0.28, "learning_rate": 4.85981308411215e-05, "loss": 1.0129, "step": 3210 }, { "epoch": 0.28, "learning_rate": 4.859376364748013e-05, "loss": 0.8573, "step": 3220 }, { "epoch": 0.28, "learning_rate": 4.8589396453838764e-05, "loss": 0.9787, "step": 3230 }, { "epoch": 0.28, "learning_rate": 4.8585029260197404e-05, "loss": 1.0416, "step": 3240 }, { "epoch": 0.28, "learning_rate": 4.858066206655603e-05, "loss": 0.9824, "step": 3250 }, { "epoch": 0.28, "learning_rate": 4.857629487291467e-05, "loss": 0.9861, "step": 3260 }, { "epoch": 0.29, "learning_rate": 4.85719276792733e-05, "loss": 0.9741, "step": 3270 }, { "epoch": 0.29, "learning_rate": 4.856756048563194e-05, "loss": 0.8959, "step": 3280 }, { "epoch": 0.29, "learning_rate": 4.856319329199057e-05, "loss": 1.037, "step": 3290 }, { "epoch": 0.29, "learning_rate": 4.85588260983492e-05, "loss": 0.9341, "step": 3300 }, { "epoch": 0.29, "learning_rate": 4.8554458904707837e-05, "loss": 0.9072, "step": 3310 }, { "epoch": 0.29, "learning_rate": 4.855009171106647e-05, "loss": 0.9548, "step": 3320 }, { "epoch": 0.29, "learning_rate": 4.85457245174251e-05, "loss": 0.9305, "step": 3330 }, { "epoch": 0.29, "learning_rate": 4.8541357323783736e-05, "loss": 1.0128, "step": 3340 }, { "epoch": 0.29, "learning_rate": 4.8536990130142376e-05, "loss": 0.9327, "step": 3350 }, { "epoch": 0.29, "learning_rate": 4.8532622936501e-05, "loss": 0.992, "step": 3360 }, { "epoch": 0.29, "learning_rate": 4.852825574285964e-05, "loss": 0.9628, "step": 3370 }, { "epoch": 0.3, "learning_rate": 4.8523888549218276e-05, "loss": 0.9177, "step": 3380 }, { "epoch": 0.3, "learning_rate": 4.851952135557691e-05, "loss": 0.9348, "step": 3390 }, { "epoch": 0.3, "learning_rate": 4.851515416193554e-05, "loss": 0.9206, "step": 3400 }, { "epoch": 0.3, "learning_rate": 4.8510786968294175e-05, "loss": 0.8575, "step": 3410 }, { "epoch": 0.3, "learning_rate": 4.850641977465281e-05, "loss": 0.9916, "step": 3420 }, { "epoch": 0.3, "learning_rate": 4.850205258101144e-05, "loss": 1.1207, "step": 3430 }, { "epoch": 0.3, "learning_rate": 4.849768538737008e-05, "loss": 0.9739, "step": 3440 }, { "epoch": 0.3, "learning_rate": 4.849331819372871e-05, "loss": 0.8795, "step": 3450 }, { "epoch": 0.3, "learning_rate": 4.848895100008735e-05, "loss": 0.863, "step": 3460 }, { "epoch": 0.3, "learning_rate": 4.848458380644598e-05, "loss": 0.9418, "step": 3470 }, { "epoch": 0.3, "learning_rate": 4.8480216612804615e-05, "loss": 0.9192, "step": 3480 }, { "epoch": 0.3, "learning_rate": 4.847584941916325e-05, "loss": 1.0152, "step": 3490 }, { "epoch": 0.31, "learning_rate": 4.847148222552188e-05, "loss": 0.9408, "step": 3500 }, { "epoch": 0.31, "learning_rate": 4.8467115031880514e-05, "loss": 0.8909, "step": 3510 }, { "epoch": 0.31, "learning_rate": 4.846274783823915e-05, "loss": 0.965, "step": 3520 }, { "epoch": 0.31, "learning_rate": 4.845838064459778e-05, "loss": 0.9394, "step": 3530 }, { "epoch": 0.31, "learning_rate": 4.845401345095642e-05, "loss": 1.0399, "step": 3540 }, { "epoch": 0.31, "learning_rate": 4.8449646257315054e-05, "loss": 0.9975, "step": 3550 }, { "epoch": 0.31, "learning_rate": 4.844527906367369e-05, "loss": 1.0759, "step": 3560 }, { "epoch": 0.31, "learning_rate": 4.844091187003232e-05, "loss": 0.8918, "step": 3570 }, { "epoch": 0.31, "learning_rate": 4.843654467639095e-05, "loss": 1.0044, "step": 3580 }, { "epoch": 0.31, "learning_rate": 4.8432177482749587e-05, "loss": 0.9593, "step": 3590 }, { "epoch": 0.31, "learning_rate": 4.842781028910822e-05, "loss": 0.8939, "step": 3600 }, { "epoch": 0.32, "learning_rate": 4.842344309546685e-05, "loss": 1.0201, "step": 3610 }, { "epoch": 0.32, "learning_rate": 4.8419075901825486e-05, "loss": 0.9759, "step": 3620 }, { "epoch": 0.32, "learning_rate": 4.8414708708184126e-05, "loss": 0.9575, "step": 3630 }, { "epoch": 0.32, "learning_rate": 4.841034151454276e-05, "loss": 0.9803, "step": 3640 }, { "epoch": 0.32, "learning_rate": 4.840597432090139e-05, "loss": 1.0374, "step": 3650 }, { "epoch": 0.32, "learning_rate": 4.8401607127260026e-05, "loss": 1.0574, "step": 3660 }, { "epoch": 0.32, "learning_rate": 4.839723993361866e-05, "loss": 0.9993, "step": 3670 }, { "epoch": 0.32, "learning_rate": 4.839287273997729e-05, "loss": 0.9252, "step": 3680 }, { "epoch": 0.32, "learning_rate": 4.8388505546335925e-05, "loss": 0.9333, "step": 3690 }, { "epoch": 0.32, "learning_rate": 4.8384138352694565e-05, "loss": 0.9202, "step": 3700 }, { "epoch": 0.32, "learning_rate": 4.837977115905319e-05, "loss": 0.9498, "step": 3710 }, { "epoch": 0.32, "learning_rate": 4.837540396541183e-05, "loss": 0.9133, "step": 3720 }, { "epoch": 0.33, "learning_rate": 4.8371036771770465e-05, "loss": 0.9092, "step": 3730 }, { "epoch": 0.33, "learning_rate": 4.83666695781291e-05, "loss": 0.9405, "step": 3740 }, { "epoch": 0.33, "learning_rate": 4.836230238448773e-05, "loss": 0.9134, "step": 3750 }, { "epoch": 0.33, "learning_rate": 4.8357935190846365e-05, "loss": 0.9513, "step": 3760 }, { "epoch": 0.33, "learning_rate": 4.8353567997205e-05, "loss": 1.0138, "step": 3770 }, { "epoch": 0.33, "learning_rate": 4.834920080356363e-05, "loss": 0.9744, "step": 3780 }, { "epoch": 0.33, "learning_rate": 4.834483360992227e-05, "loss": 0.8957, "step": 3790 }, { "epoch": 0.33, "learning_rate": 4.83404664162809e-05, "loss": 0.9095, "step": 3800 }, { "epoch": 0.33, "learning_rate": 4.833609922263954e-05, "loss": 0.9416, "step": 3810 }, { "epoch": 0.33, "learning_rate": 4.8331732028998164e-05, "loss": 0.9695, "step": 3820 }, { "epoch": 0.33, "learning_rate": 4.8327364835356804e-05, "loss": 0.9969, "step": 3830 }, { "epoch": 0.34, "learning_rate": 4.832299764171544e-05, "loss": 0.9227, "step": 3840 }, { "epoch": 0.34, "learning_rate": 4.831863044807407e-05, "loss": 0.9607, "step": 3850 }, { "epoch": 0.34, "learning_rate": 4.83142632544327e-05, "loss": 1.0871, "step": 3860 }, { "epoch": 0.34, "learning_rate": 4.8309896060791337e-05, "loss": 0.9033, "step": 3870 }, { "epoch": 0.34, "learning_rate": 4.8305528867149977e-05, "loss": 0.9323, "step": 3880 }, { "epoch": 0.34, "learning_rate": 4.83011616735086e-05, "loss": 0.9549, "step": 3890 }, { "epoch": 0.34, "learning_rate": 4.829679447986724e-05, "loss": 0.9288, "step": 3900 }, { "epoch": 0.34, "learning_rate": 4.829242728622587e-05, "loss": 0.9792, "step": 3910 }, { "epoch": 0.34, "learning_rate": 4.828806009258451e-05, "loss": 0.9596, "step": 3920 }, { "epoch": 0.34, "learning_rate": 4.828369289894314e-05, "loss": 0.9292, "step": 3930 }, { "epoch": 0.34, "learning_rate": 4.8279325705301776e-05, "loss": 0.9185, "step": 3940 }, { "epoch": 0.35, "learning_rate": 4.827495851166041e-05, "loss": 0.9718, "step": 3950 }, { "epoch": 0.35, "learning_rate": 4.827059131801904e-05, "loss": 0.9121, "step": 3960 }, { "epoch": 0.35, "learning_rate": 4.826622412437768e-05, "loss": 1.0239, "step": 3970 }, { "epoch": 0.35, "learning_rate": 4.826185693073631e-05, "loss": 0.9005, "step": 3980 }, { "epoch": 0.35, "learning_rate": 4.825748973709495e-05, "loss": 0.8409, "step": 3990 }, { "epoch": 0.35, "learning_rate": 4.8253122543453575e-05, "loss": 0.9646, "step": 4000 }, { "epoch": 0.35, "learning_rate": 4.8248755349812215e-05, "loss": 0.8558, "step": 4010 }, { "epoch": 0.35, "learning_rate": 4.824438815617084e-05, "loss": 0.9921, "step": 4020 }, { "epoch": 0.35, "learning_rate": 4.824002096252948e-05, "loss": 0.9726, "step": 4030 }, { "epoch": 0.35, "learning_rate": 4.8235653768888115e-05, "loss": 0.9622, "step": 4040 }, { "epoch": 0.35, "learning_rate": 4.823128657524675e-05, "loss": 0.8935, "step": 4050 }, { "epoch": 0.35, "learning_rate": 4.822691938160539e-05, "loss": 0.9742, "step": 4060 }, { "epoch": 0.36, "learning_rate": 4.8222552187964014e-05, "loss": 0.937, "step": 4070 }, { "epoch": 0.36, "learning_rate": 4.8218184994322654e-05, "loss": 0.938, "step": 4080 }, { "epoch": 0.36, "learning_rate": 4.821381780068128e-05, "loss": 1.0625, "step": 4090 }, { "epoch": 0.36, "learning_rate": 4.820945060703992e-05, "loss": 0.8882, "step": 4100 }, { "epoch": 0.36, "learning_rate": 4.8205083413398554e-05, "loss": 0.9611, "step": 4110 }, { "epoch": 0.36, "learning_rate": 4.820071621975719e-05, "loss": 0.9687, "step": 4120 }, { "epoch": 0.36, "learning_rate": 4.819634902611582e-05, "loss": 1.0842, "step": 4130 }, { "epoch": 0.36, "learning_rate": 4.819198183247445e-05, "loss": 0.9597, "step": 4140 }, { "epoch": 0.36, "learning_rate": 4.8187614638833087e-05, "loss": 0.9726, "step": 4150 }, { "epoch": 0.36, "learning_rate": 4.818324744519172e-05, "loss": 0.9093, "step": 4160 }, { "epoch": 0.36, "learning_rate": 4.817888025155036e-05, "loss": 0.9404, "step": 4170 }, { "epoch": 0.37, "learning_rate": 4.8174513057908986e-05, "loss": 0.9821, "step": 4180 }, { "epoch": 0.37, "learning_rate": 4.8170145864267626e-05, "loss": 0.9522, "step": 4190 }, { "epoch": 0.37, "learning_rate": 4.816577867062626e-05, "loss": 0.782, "step": 4200 }, { "epoch": 0.37, "learning_rate": 4.816141147698489e-05, "loss": 0.9857, "step": 4210 }, { "epoch": 0.37, "learning_rate": 4.8157044283343526e-05, "loss": 1.0036, "step": 4220 }, { "epoch": 0.37, "learning_rate": 4.815267708970216e-05, "loss": 0.9922, "step": 4230 }, { "epoch": 0.37, "learning_rate": 4.814830989606079e-05, "loss": 1.0202, "step": 4240 }, { "epoch": 0.37, "learning_rate": 4.8143942702419425e-05, "loss": 0.9557, "step": 4250 }, { "epoch": 0.37, "learning_rate": 4.8139575508778065e-05, "loss": 1.0429, "step": 4260 }, { "epoch": 0.37, "learning_rate": 4.813520831513669e-05, "loss": 0.9351, "step": 4270 }, { "epoch": 0.37, "learning_rate": 4.813084112149533e-05, "loss": 1.027, "step": 4280 }, { "epoch": 0.37, "learning_rate": 4.8126473927853965e-05, "loss": 0.964, "step": 4290 }, { "epoch": 0.38, "learning_rate": 4.81221067342126e-05, "loss": 1.0001, "step": 4300 }, { "epoch": 0.38, "learning_rate": 4.811773954057123e-05, "loss": 0.8441, "step": 4310 }, { "epoch": 0.38, "learning_rate": 4.8113372346929865e-05, "loss": 0.9485, "step": 4320 }, { "epoch": 0.38, "learning_rate": 4.81090051532885e-05, "loss": 0.9037, "step": 4330 }, { "epoch": 0.38, "learning_rate": 4.810463795964713e-05, "loss": 0.9977, "step": 4340 }, { "epoch": 0.38, "learning_rate": 4.8100270766005764e-05, "loss": 1.0251, "step": 4350 }, { "epoch": 0.38, "learning_rate": 4.8095903572364404e-05, "loss": 0.8914, "step": 4360 }, { "epoch": 0.38, "learning_rate": 4.809153637872304e-05, "loss": 0.9359, "step": 4370 }, { "epoch": 0.38, "learning_rate": 4.808716918508167e-05, "loss": 0.9597, "step": 4380 }, { "epoch": 0.38, "learning_rate": 4.8082801991440304e-05, "loss": 0.9289, "step": 4390 }, { "epoch": 0.38, "learning_rate": 4.807843479779894e-05, "loss": 0.9925, "step": 4400 }, { "epoch": 0.39, "learning_rate": 4.807406760415757e-05, "loss": 0.8852, "step": 4410 }, { "epoch": 0.39, "learning_rate": 4.80697004105162e-05, "loss": 1.0007, "step": 4420 }, { "epoch": 0.39, "learning_rate": 4.8065333216874837e-05, "loss": 0.9665, "step": 4430 }, { "epoch": 0.39, "learning_rate": 4.806096602323347e-05, "loss": 0.8963, "step": 4440 }, { "epoch": 0.39, "learning_rate": 4.805659882959211e-05, "loss": 0.9186, "step": 4450 }, { "epoch": 0.39, "learning_rate": 4.805223163595074e-05, "loss": 0.9967, "step": 4460 }, { "epoch": 0.39, "learning_rate": 4.8047864442309376e-05, "loss": 0.8759, "step": 4470 }, { "epoch": 0.39, "learning_rate": 4.804349724866801e-05, "loss": 0.9268, "step": 4480 }, { "epoch": 0.39, "learning_rate": 4.803913005502664e-05, "loss": 1.068, "step": 4490 }, { "epoch": 0.39, "learning_rate": 4.8034762861385276e-05, "loss": 0.8693, "step": 4500 }, { "epoch": 0.39, "learning_rate": 4.803039566774391e-05, "loss": 0.9903, "step": 4510 }, { "epoch": 0.39, "learning_rate": 4.802602847410254e-05, "loss": 0.9137, "step": 4520 }, { "epoch": 0.4, "learning_rate": 4.8021661280461175e-05, "loss": 1.0447, "step": 4530 }, { "epoch": 0.4, "learning_rate": 4.8017294086819815e-05, "loss": 0.985, "step": 4540 }, { "epoch": 0.4, "learning_rate": 4.801292689317844e-05, "loss": 0.9566, "step": 4550 }, { "epoch": 0.4, "learning_rate": 4.800855969953708e-05, "loss": 0.8894, "step": 4560 }, { "epoch": 0.4, "learning_rate": 4.8004192505895715e-05, "loss": 0.9889, "step": 4570 }, { "epoch": 0.4, "learning_rate": 4.799982531225435e-05, "loss": 0.9232, "step": 4580 }, { "epoch": 0.4, "learning_rate": 4.799545811861298e-05, "loss": 1.0342, "step": 4590 }, { "epoch": 0.4, "learning_rate": 4.7991090924971615e-05, "loss": 0.8374, "step": 4600 }, { "epoch": 0.4, "learning_rate": 4.7986723731330255e-05, "loss": 0.8928, "step": 4610 }, { "epoch": 0.4, "learning_rate": 4.798235653768888e-05, "loss": 0.8429, "step": 4620 }, { "epoch": 0.4, "learning_rate": 4.797798934404752e-05, "loss": 1.0211, "step": 4630 }, { "epoch": 0.41, "learning_rate": 4.797362215040615e-05, "loss": 1.0058, "step": 4640 }, { "epoch": 0.41, "learning_rate": 4.796925495676479e-05, "loss": 0.9645, "step": 4650 }, { "epoch": 0.41, "learning_rate": 4.796488776312342e-05, "loss": 0.956, "step": 4660 }, { "epoch": 0.41, "learning_rate": 4.7960520569482054e-05, "loss": 1.0115, "step": 4670 }, { "epoch": 0.41, "learning_rate": 4.795615337584069e-05, "loss": 1.0118, "step": 4680 }, { "epoch": 0.41, "learning_rate": 4.795178618219932e-05, "loss": 0.9815, "step": 4690 }, { "epoch": 0.41, "learning_rate": 4.794741898855796e-05, "loss": 0.8431, "step": 4700 }, { "epoch": 0.41, "learning_rate": 4.7943051794916587e-05, "loss": 0.928, "step": 4710 }, { "epoch": 0.41, "learning_rate": 4.7938684601275227e-05, "loss": 0.8808, "step": 4720 }, { "epoch": 0.41, "learning_rate": 4.793431740763385e-05, "loss": 0.9732, "step": 4730 }, { "epoch": 0.41, "learning_rate": 4.792995021399249e-05, "loss": 0.8384, "step": 4740 }, { "epoch": 0.41, "learning_rate": 4.792558302035112e-05, "loss": 1.0879, "step": 4750 }, { "epoch": 0.42, "learning_rate": 4.792121582670976e-05, "loss": 0.8439, "step": 4760 }, { "epoch": 0.42, "learning_rate": 4.791684863306839e-05, "loss": 0.9156, "step": 4770 }, { "epoch": 0.42, "learning_rate": 4.7912481439427026e-05, "loss": 0.833, "step": 4780 }, { "epoch": 0.42, "learning_rate": 4.7908114245785666e-05, "loss": 0.9546, "step": 4790 }, { "epoch": 0.42, "learning_rate": 4.790374705214429e-05, "loss": 0.8693, "step": 4800 }, { "epoch": 0.42, "learning_rate": 4.789937985850293e-05, "loss": 1.1034, "step": 4810 }, { "epoch": 0.42, "learning_rate": 4.789501266486156e-05, "loss": 1.0163, "step": 4820 }, { "epoch": 0.42, "learning_rate": 4.78906454712202e-05, "loss": 0.9428, "step": 4830 }, { "epoch": 0.42, "learning_rate": 4.7886278277578825e-05, "loss": 0.9196, "step": 4840 }, { "epoch": 0.42, "learning_rate": 4.7881911083937465e-05, "loss": 0.8901, "step": 4850 }, { "epoch": 0.42, "learning_rate": 4.78775438902961e-05, "loss": 0.9336, "step": 4860 }, { "epoch": 0.43, "learning_rate": 4.787317669665473e-05, "loss": 0.9136, "step": 4870 }, { "epoch": 0.43, "learning_rate": 4.7868809503013365e-05, "loss": 1.0059, "step": 4880 }, { "epoch": 0.43, "learning_rate": 4.7864442309372e-05, "loss": 0.8842, "step": 4890 }, { "epoch": 0.43, "learning_rate": 4.786007511573064e-05, "loss": 0.8995, "step": 4900 }, { "epoch": 0.43, "learning_rate": 4.7855707922089264e-05, "loss": 0.888, "step": 4910 }, { "epoch": 0.43, "learning_rate": 4.7851340728447904e-05, "loss": 0.9411, "step": 4920 }, { "epoch": 0.43, "learning_rate": 4.784697353480653e-05, "loss": 1.0011, "step": 4930 }, { "epoch": 0.43, "learning_rate": 4.784260634116517e-05, "loss": 0.9201, "step": 4940 }, { "epoch": 0.43, "learning_rate": 4.7838239147523804e-05, "loss": 0.9689, "step": 4950 }, { "epoch": 0.43, "learning_rate": 4.783387195388244e-05, "loss": 0.9215, "step": 4960 }, { "epoch": 0.43, "learning_rate": 4.782950476024107e-05, "loss": 0.8547, "step": 4970 }, { "epoch": 0.43, "learning_rate": 4.7825137566599703e-05, "loss": 0.9999, "step": 4980 }, { "epoch": 0.44, "learning_rate": 4.782077037295834e-05, "loss": 0.8561, "step": 4990 }, { "epoch": 0.44, "learning_rate": 4.781640317931697e-05, "loss": 0.915, "step": 5000 }, { "epoch": 0.44, "eval_accuracy": 0.5413186597229046, "eval_loss": 0.9457740783691406, "eval_runtime": 84.182, "eval_samples_per_second": 120.893, "eval_steps_per_second": 15.122, "step": 5000 }, { "epoch": 0.44, "learning_rate": 4.781203598567561e-05, "loss": 1.0026, "step": 5010 }, { "epoch": 0.44, "learning_rate": 4.780766879203424e-05, "loss": 1.1102, "step": 5020 }, { "epoch": 0.44, "learning_rate": 4.7803301598392876e-05, "loss": 0.9471, "step": 5030 }, { "epoch": 0.44, "learning_rate": 4.779893440475151e-05, "loss": 1.0117, "step": 5040 }, { "epoch": 0.44, "learning_rate": 4.779456721111014e-05, "loss": 0.9408, "step": 5050 }, { "epoch": 0.44, "learning_rate": 4.7790200017468776e-05, "loss": 0.9945, "step": 5060 }, { "epoch": 0.44, "learning_rate": 4.778583282382741e-05, "loss": 0.9939, "step": 5070 }, { "epoch": 0.44, "learning_rate": 4.778146563018604e-05, "loss": 0.9611, "step": 5080 }, { "epoch": 0.44, "learning_rate": 4.7777098436544675e-05, "loss": 0.9477, "step": 5090 }, { "epoch": 0.45, "learning_rate": 4.7772731242903315e-05, "loss": 0.9704, "step": 5100 }, { "epoch": 0.45, "learning_rate": 4.776836404926195e-05, "loss": 1.046, "step": 5110 }, { "epoch": 0.45, "learning_rate": 4.776399685562058e-05, "loss": 0.9343, "step": 5120 }, { "epoch": 0.45, "learning_rate": 4.7759629661979215e-05, "loss": 0.8697, "step": 5130 }, { "epoch": 0.45, "learning_rate": 4.775526246833785e-05, "loss": 0.9833, "step": 5140 }, { "epoch": 0.45, "learning_rate": 4.775089527469648e-05, "loss": 1.0198, "step": 5150 }, { "epoch": 0.45, "learning_rate": 4.7746528081055115e-05, "loss": 0.9693, "step": 5160 }, { "epoch": 0.45, "learning_rate": 4.774216088741375e-05, "loss": 0.8275, "step": 5170 }, { "epoch": 0.45, "learning_rate": 4.773779369377239e-05, "loss": 0.9569, "step": 5180 }, { "epoch": 0.45, "learning_rate": 4.773342650013102e-05, "loss": 0.907, "step": 5190 }, { "epoch": 0.45, "learning_rate": 4.7729059306489654e-05, "loss": 0.9482, "step": 5200 }, { "epoch": 0.46, "learning_rate": 4.772469211284829e-05, "loss": 0.9195, "step": 5210 }, { "epoch": 0.46, "learning_rate": 4.772032491920692e-05, "loss": 0.7862, "step": 5220 }, { "epoch": 0.46, "learning_rate": 4.7715957725565554e-05, "loss": 0.9334, "step": 5230 }, { "epoch": 0.46, "learning_rate": 4.771159053192419e-05, "loss": 0.8626, "step": 5240 }, { "epoch": 0.46, "learning_rate": 4.770722333828282e-05, "loss": 0.9073, "step": 5250 }, { "epoch": 0.46, "learning_rate": 4.7702856144641453e-05, "loss": 1.0073, "step": 5260 }, { "epoch": 0.46, "learning_rate": 4.769848895100009e-05, "loss": 1.1421, "step": 5270 }, { "epoch": 0.46, "learning_rate": 4.769412175735872e-05, "loss": 0.8536, "step": 5280 }, { "epoch": 0.46, "learning_rate": 4.768975456371736e-05, "loss": 1.0058, "step": 5290 }, { "epoch": 0.46, "learning_rate": 4.768538737007599e-05, "loss": 0.9426, "step": 5300 }, { "epoch": 0.46, "learning_rate": 4.7681020176434626e-05, "loss": 0.9221, "step": 5310 }, { "epoch": 0.46, "learning_rate": 4.767665298279326e-05, "loss": 0.9836, "step": 5320 }, { "epoch": 0.47, "learning_rate": 4.767228578915189e-05, "loss": 0.9412, "step": 5330 }, { "epoch": 0.47, "learning_rate": 4.7667918595510526e-05, "loss": 0.9653, "step": 5340 }, { "epoch": 0.47, "learning_rate": 4.766355140186916e-05, "loss": 0.9413, "step": 5350 }, { "epoch": 0.47, "learning_rate": 4.76591842082278e-05, "loss": 0.8595, "step": 5360 }, { "epoch": 0.47, "learning_rate": 4.7654817014586425e-05, "loss": 0.9277, "step": 5370 }, { "epoch": 0.47, "learning_rate": 4.7650449820945065e-05, "loss": 0.9825, "step": 5380 }, { "epoch": 0.47, "learning_rate": 4.76460826273037e-05, "loss": 0.9038, "step": 5390 }, { "epoch": 0.47, "learning_rate": 4.764171543366233e-05, "loss": 0.9425, "step": 5400 }, { "epoch": 0.47, "learning_rate": 4.7637348240020965e-05, "loss": 1.0464, "step": 5410 }, { "epoch": 0.47, "learning_rate": 4.76329810463796e-05, "loss": 0.9711, "step": 5420 }, { "epoch": 0.47, "learning_rate": 4.762861385273824e-05, "loss": 0.9361, "step": 5430 }, { "epoch": 0.48, "learning_rate": 4.7624246659096865e-05, "loss": 1.003, "step": 5440 }, { "epoch": 0.48, "learning_rate": 4.7619879465455505e-05, "loss": 0.9661, "step": 5450 }, { "epoch": 0.48, "learning_rate": 4.761551227181413e-05, "loss": 0.9532, "step": 5460 }, { "epoch": 0.48, "learning_rate": 4.761114507817277e-05, "loss": 0.9735, "step": 5470 }, { "epoch": 0.48, "learning_rate": 4.76067778845314e-05, "loss": 0.9799, "step": 5480 }, { "epoch": 0.48, "learning_rate": 4.760241069089004e-05, "loss": 0.8876, "step": 5490 }, { "epoch": 0.48, "learning_rate": 4.759804349724867e-05, "loss": 0.9545, "step": 5500 }, { "epoch": 0.48, "learning_rate": 4.7593676303607304e-05, "loss": 0.9032, "step": 5510 }, { "epoch": 0.48, "learning_rate": 4.7589309109965944e-05, "loss": 0.9161, "step": 5520 }, { "epoch": 0.48, "learning_rate": 4.758494191632457e-05, "loss": 0.9136, "step": 5530 }, { "epoch": 0.48, "learning_rate": 4.758057472268321e-05, "loss": 0.935, "step": 5540 }, { "epoch": 0.48, "learning_rate": 4.757620752904184e-05, "loss": 0.8862, "step": 5550 }, { "epoch": 0.49, "learning_rate": 4.7571840335400477e-05, "loss": 0.9883, "step": 5560 }, { "epoch": 0.49, "learning_rate": 4.75674731417591e-05, "loss": 0.9101, "step": 5570 }, { "epoch": 0.49, "learning_rate": 4.756310594811774e-05, "loss": 0.9613, "step": 5580 }, { "epoch": 0.49, "learning_rate": 4.7558738754476376e-05, "loss": 0.9583, "step": 5590 }, { "epoch": 0.49, "learning_rate": 4.755437156083501e-05, "loss": 1.0256, "step": 5600 }, { "epoch": 0.49, "learning_rate": 4.755000436719364e-05, "loss": 0.8804, "step": 5610 }, { "epoch": 0.49, "learning_rate": 4.7545637173552276e-05, "loss": 1.0011, "step": 5620 }, { "epoch": 0.49, "learning_rate": 4.7541269979910916e-05, "loss": 0.9145, "step": 5630 }, { "epoch": 0.49, "learning_rate": 4.753690278626954e-05, "loss": 1.0676, "step": 5640 }, { "epoch": 0.49, "learning_rate": 4.753253559262818e-05, "loss": 1.015, "step": 5650 }, { "epoch": 0.49, "learning_rate": 4.752816839898681e-05, "loss": 1.0333, "step": 5660 }, { "epoch": 0.5, "learning_rate": 4.752380120534545e-05, "loss": 0.9977, "step": 5670 }, { "epoch": 0.5, "learning_rate": 4.751943401170408e-05, "loss": 0.9501, "step": 5680 }, { "epoch": 0.5, "learning_rate": 4.7515066818062715e-05, "loss": 0.8981, "step": 5690 }, { "epoch": 0.5, "learning_rate": 4.751069962442135e-05, "loss": 0.8934, "step": 5700 }, { "epoch": 0.5, "learning_rate": 4.750633243077998e-05, "loss": 0.9149, "step": 5710 }, { "epoch": 0.5, "learning_rate": 4.750196523713862e-05, "loss": 0.9701, "step": 5720 }, { "epoch": 0.5, "learning_rate": 4.749759804349725e-05, "loss": 0.7643, "step": 5730 }, { "epoch": 0.5, "learning_rate": 4.749323084985589e-05, "loss": 0.9105, "step": 5740 }, { "epoch": 0.5, "learning_rate": 4.7488863656214514e-05, "loss": 1.0524, "step": 5750 }, { "epoch": 0.5, "learning_rate": 4.7484496462573154e-05, "loss": 1.0357, "step": 5760 }, { "epoch": 0.5, "learning_rate": 4.748012926893179e-05, "loss": 0.8244, "step": 5770 }, { "epoch": 0.5, "learning_rate": 4.747576207529042e-05, "loss": 0.9587, "step": 5780 }, { "epoch": 0.51, "learning_rate": 4.7471394881649054e-05, "loss": 1.042, "step": 5790 }, { "epoch": 0.51, "learning_rate": 4.746702768800769e-05, "loss": 0.8206, "step": 5800 }, { "epoch": 0.51, "learning_rate": 4.746266049436632e-05, "loss": 0.9515, "step": 5810 }, { "epoch": 0.51, "learning_rate": 4.7458293300724953e-05, "loss": 0.9164, "step": 5820 }, { "epoch": 0.51, "learning_rate": 4.7453926107083593e-05, "loss": 0.831, "step": 5830 }, { "epoch": 0.51, "learning_rate": 4.744955891344223e-05, "loss": 0.9254, "step": 5840 }, { "epoch": 0.51, "learning_rate": 4.744519171980086e-05, "loss": 0.9678, "step": 5850 }, { "epoch": 0.51, "learning_rate": 4.744082452615949e-05, "loss": 0.9928, "step": 5860 }, { "epoch": 0.51, "learning_rate": 4.7436457332518126e-05, "loss": 0.9701, "step": 5870 }, { "epoch": 0.51, "learning_rate": 4.743209013887676e-05, "loss": 1.0539, "step": 5880 }, { "epoch": 0.51, "learning_rate": 4.742772294523539e-05, "loss": 0.9236, "step": 5890 }, { "epoch": 0.52, "learning_rate": 4.7423355751594026e-05, "loss": 0.958, "step": 5900 }, { "epoch": 0.52, "learning_rate": 4.741898855795266e-05, "loss": 1.0044, "step": 5910 }, { "epoch": 0.52, "learning_rate": 4.74146213643113e-05, "loss": 1.0415, "step": 5920 }, { "epoch": 0.52, "learning_rate": 4.741025417066993e-05, "loss": 0.914, "step": 5930 }, { "epoch": 0.52, "learning_rate": 4.7405886977028565e-05, "loss": 0.935, "step": 5940 }, { "epoch": 0.52, "learning_rate": 4.74015197833872e-05, "loss": 0.9326, "step": 5950 }, { "epoch": 0.52, "learning_rate": 4.739715258974583e-05, "loss": 0.8993, "step": 5960 }, { "epoch": 0.52, "learning_rate": 4.7392785396104465e-05, "loss": 0.88, "step": 5970 }, { "epoch": 0.52, "learning_rate": 4.73884182024631e-05, "loss": 0.9679, "step": 5980 }, { "epoch": 0.52, "learning_rate": 4.738405100882173e-05, "loss": 1.1006, "step": 5990 }, { "epoch": 0.52, "learning_rate": 4.737968381518037e-05, "loss": 0.9666, "step": 6000 }, { "epoch": 0.52, "learning_rate": 4.7375316621539e-05, "loss": 1.0173, "step": 6010 }, { "epoch": 0.53, "learning_rate": 4.737094942789764e-05, "loss": 0.9477, "step": 6020 }, { "epoch": 0.53, "learning_rate": 4.736658223425627e-05, "loss": 0.8937, "step": 6030 }, { "epoch": 0.53, "learning_rate": 4.7362215040614904e-05, "loss": 0.9316, "step": 6040 }, { "epoch": 0.53, "learning_rate": 4.735784784697354e-05, "loss": 1.0215, "step": 6050 }, { "epoch": 0.53, "learning_rate": 4.735348065333217e-05, "loss": 0.9685, "step": 6060 }, { "epoch": 0.53, "learning_rate": 4.7349113459690804e-05, "loss": 0.8727, "step": 6070 }, { "epoch": 0.53, "learning_rate": 4.734474626604944e-05, "loss": 0.9117, "step": 6080 }, { "epoch": 0.53, "learning_rate": 4.734037907240808e-05, "loss": 1.0533, "step": 6090 }, { "epoch": 0.53, "learning_rate": 4.7336011878766703e-05, "loss": 1.0263, "step": 6100 }, { "epoch": 0.53, "learning_rate": 4.7331644685125343e-05, "loss": 0.9017, "step": 6110 }, { "epoch": 0.53, "learning_rate": 4.732727749148398e-05, "loss": 0.9113, "step": 6120 }, { "epoch": 0.54, "learning_rate": 4.732291029784261e-05, "loss": 0.8363, "step": 6130 }, { "epoch": 0.54, "learning_rate": 4.731854310420124e-05, "loss": 0.9279, "step": 6140 }, { "epoch": 0.54, "learning_rate": 4.7314175910559876e-05, "loss": 0.9293, "step": 6150 }, { "epoch": 0.54, "learning_rate": 4.730980871691851e-05, "loss": 0.884, "step": 6160 }, { "epoch": 0.54, "learning_rate": 4.730544152327714e-05, "loss": 0.9532, "step": 6170 }, { "epoch": 0.54, "learning_rate": 4.730107432963578e-05, "loss": 0.8419, "step": 6180 }, { "epoch": 0.54, "learning_rate": 4.729670713599441e-05, "loss": 0.8287, "step": 6190 }, { "epoch": 0.54, "learning_rate": 4.729233994235305e-05, "loss": 0.826, "step": 6200 }, { "epoch": 0.54, "learning_rate": 4.7287972748711675e-05, "loss": 0.9862, "step": 6210 }, { "epoch": 0.54, "learning_rate": 4.7283605555070315e-05, "loss": 0.8946, "step": 6220 }, { "epoch": 0.54, "learning_rate": 4.727923836142895e-05, "loss": 1.0277, "step": 6230 }, { "epoch": 0.55, "learning_rate": 4.727487116778758e-05, "loss": 0.885, "step": 6240 }, { "epoch": 0.55, "learning_rate": 4.727050397414622e-05, "loss": 0.9274, "step": 6250 }, { "epoch": 0.55, "learning_rate": 4.726613678050485e-05, "loss": 0.9631, "step": 6260 }, { "epoch": 0.55, "learning_rate": 4.726176958686349e-05, "loss": 0.9603, "step": 6270 }, { "epoch": 0.55, "learning_rate": 4.7257402393222115e-05, "loss": 0.8425, "step": 6280 }, { "epoch": 0.55, "learning_rate": 4.7253035199580755e-05, "loss": 0.8951, "step": 6290 }, { "epoch": 0.55, "learning_rate": 4.724866800593938e-05, "loss": 0.8985, "step": 6300 }, { "epoch": 0.55, "learning_rate": 4.724430081229802e-05, "loss": 1.0314, "step": 6310 }, { "epoch": 0.55, "learning_rate": 4.7239933618656654e-05, "loss": 0.968, "step": 6320 }, { "epoch": 0.55, "learning_rate": 4.723556642501529e-05, "loss": 0.9126, "step": 6330 }, { "epoch": 0.55, "learning_rate": 4.723119923137392e-05, "loss": 0.9209, "step": 6340 }, { "epoch": 0.55, "learning_rate": 4.7226832037732554e-05, "loss": 0.9288, "step": 6350 }, { "epoch": 0.56, "learning_rate": 4.7222464844091194e-05, "loss": 0.9363, "step": 6360 }, { "epoch": 0.56, "learning_rate": 4.721809765044982e-05, "loss": 0.9826, "step": 6370 }, { "epoch": 0.56, "learning_rate": 4.721373045680846e-05, "loss": 1.0211, "step": 6380 }, { "epoch": 0.56, "learning_rate": 4.720936326316709e-05, "loss": 1.0569, "step": 6390 }, { "epoch": 0.56, "learning_rate": 4.720499606952573e-05, "loss": 0.9363, "step": 6400 }, { "epoch": 0.56, "learning_rate": 4.720062887588435e-05, "loss": 0.8699, "step": 6410 }, { "epoch": 0.56, "learning_rate": 4.719626168224299e-05, "loss": 1.0077, "step": 6420 }, { "epoch": 0.56, "learning_rate": 4.7191894488601626e-05, "loss": 1.0768, "step": 6430 }, { "epoch": 0.56, "learning_rate": 4.718752729496026e-05, "loss": 0.9443, "step": 6440 }, { "epoch": 0.56, "learning_rate": 4.71831601013189e-05, "loss": 0.8926, "step": 6450 }, { "epoch": 0.56, "learning_rate": 4.7178792907677526e-05, "loss": 0.885, "step": 6460 }, { "epoch": 0.57, "learning_rate": 4.7174425714036166e-05, "loss": 1.0487, "step": 6470 }, { "epoch": 0.57, "learning_rate": 4.717005852039479e-05, "loss": 1.0416, "step": 6480 }, { "epoch": 0.57, "learning_rate": 4.716569132675343e-05, "loss": 0.9057, "step": 6490 }, { "epoch": 0.57, "learning_rate": 4.7161324133112065e-05, "loss": 0.9122, "step": 6500 }, { "epoch": 0.57, "learning_rate": 4.71569569394707e-05, "loss": 0.8959, "step": 6510 }, { "epoch": 0.57, "learning_rate": 4.715258974582933e-05, "loss": 0.9956, "step": 6520 }, { "epoch": 0.57, "learning_rate": 4.7148222552187965e-05, "loss": 0.9792, "step": 6530 }, { "epoch": 0.57, "learning_rate": 4.71438553585466e-05, "loss": 0.8862, "step": 6540 }, { "epoch": 0.57, "learning_rate": 4.713948816490523e-05, "loss": 0.9126, "step": 6550 }, { "epoch": 0.57, "learning_rate": 4.713512097126387e-05, "loss": 0.9894, "step": 6560 }, { "epoch": 0.57, "learning_rate": 4.71307537776225e-05, "loss": 0.938, "step": 6570 }, { "epoch": 0.57, "learning_rate": 4.712638658398114e-05, "loss": 0.9393, "step": 6580 }, { "epoch": 0.58, "learning_rate": 4.712201939033977e-05, "loss": 0.9629, "step": 6590 }, { "epoch": 0.58, "learning_rate": 4.7117652196698404e-05, "loss": 0.873, "step": 6600 }, { "epoch": 0.58, "learning_rate": 4.711328500305704e-05, "loss": 0.8973, "step": 6610 }, { "epoch": 0.58, "learning_rate": 4.710891780941567e-05, "loss": 0.9372, "step": 6620 }, { "epoch": 0.58, "learning_rate": 4.7104550615774304e-05, "loss": 0.9139, "step": 6630 }, { "epoch": 0.58, "learning_rate": 4.710018342213294e-05, "loss": 0.9415, "step": 6640 }, { "epoch": 0.58, "learning_rate": 4.709581622849158e-05, "loss": 0.9936, "step": 6650 }, { "epoch": 0.58, "learning_rate": 4.709144903485021e-05, "loss": 0.9515, "step": 6660 }, { "epoch": 0.58, "learning_rate": 4.7087081841208843e-05, "loss": 0.9849, "step": 6670 }, { "epoch": 0.58, "learning_rate": 4.708271464756748e-05, "loss": 1.0061, "step": 6680 }, { "epoch": 0.58, "learning_rate": 4.707834745392611e-05, "loss": 0.9298, "step": 6690 }, { "epoch": 0.59, "learning_rate": 4.707398026028474e-05, "loss": 0.9501, "step": 6700 }, { "epoch": 0.59, "learning_rate": 4.7069613066643376e-05, "loss": 0.9054, "step": 6710 }, { "epoch": 0.59, "learning_rate": 4.706524587300201e-05, "loss": 0.9472, "step": 6720 }, { "epoch": 0.59, "learning_rate": 4.706087867936064e-05, "loss": 0.924, "step": 6730 }, { "epoch": 0.59, "learning_rate": 4.7056511485719276e-05, "loss": 0.935, "step": 6740 }, { "epoch": 0.59, "learning_rate": 4.7052144292077916e-05, "loss": 0.9046, "step": 6750 }, { "epoch": 0.59, "learning_rate": 4.704777709843655e-05, "loss": 0.9272, "step": 6760 }, { "epoch": 0.59, "learning_rate": 4.704340990479518e-05, "loss": 0.9817, "step": 6770 }, { "epoch": 0.59, "learning_rate": 4.7039042711153815e-05, "loss": 0.919, "step": 6780 }, { "epoch": 0.59, "learning_rate": 4.703467551751245e-05, "loss": 0.8867, "step": 6790 }, { "epoch": 0.59, "learning_rate": 4.703030832387108e-05, "loss": 1.0659, "step": 6800 }, { "epoch": 0.59, "learning_rate": 4.7025941130229715e-05, "loss": 1.0513, "step": 6810 }, { "epoch": 0.6, "learning_rate": 4.702157393658835e-05, "loss": 0.9983, "step": 6820 }, { "epoch": 0.6, "learning_rate": 4.701720674294698e-05, "loss": 0.889, "step": 6830 }, { "epoch": 0.6, "learning_rate": 4.701283954930562e-05, "loss": 1.0621, "step": 6840 }, { "epoch": 0.6, "learning_rate": 4.7008472355664255e-05, "loss": 1.0154, "step": 6850 }, { "epoch": 0.6, "learning_rate": 4.700410516202289e-05, "loss": 0.9303, "step": 6860 }, { "epoch": 0.6, "learning_rate": 4.699973796838152e-05, "loss": 0.9811, "step": 6870 }, { "epoch": 0.6, "learning_rate": 4.6995370774740154e-05, "loss": 0.9383, "step": 6880 }, { "epoch": 0.6, "learning_rate": 4.699100358109879e-05, "loss": 0.8855, "step": 6890 }, { "epoch": 0.6, "learning_rate": 4.698663638745742e-05, "loss": 0.9079, "step": 6900 }, { "epoch": 0.6, "learning_rate": 4.698226919381606e-05, "loss": 0.9674, "step": 6910 }, { "epoch": 0.6, "learning_rate": 4.697790200017469e-05, "loss": 0.9785, "step": 6920 }, { "epoch": 0.61, "learning_rate": 4.697353480653333e-05, "loss": 1.0726, "step": 6930 }, { "epoch": 0.61, "learning_rate": 4.6969167612891953e-05, "loss": 0.9426, "step": 6940 }, { "epoch": 0.61, "learning_rate": 4.6964800419250593e-05, "loss": 0.9499, "step": 6950 }, { "epoch": 0.61, "learning_rate": 4.696043322560923e-05, "loss": 1.0167, "step": 6960 }, { "epoch": 0.61, "learning_rate": 4.695606603196786e-05, "loss": 0.9911, "step": 6970 }, { "epoch": 0.61, "learning_rate": 4.695169883832649e-05, "loss": 0.8555, "step": 6980 }, { "epoch": 0.61, "learning_rate": 4.6947331644685126e-05, "loss": 0.9792, "step": 6990 }, { "epoch": 0.61, "learning_rate": 4.6942964451043766e-05, "loss": 0.8869, "step": 7000 }, { "epoch": 0.61, "learning_rate": 4.693859725740239e-05, "loss": 0.9446, "step": 7010 }, { "epoch": 0.61, "learning_rate": 4.693423006376103e-05, "loss": 0.9355, "step": 7020 }, { "epoch": 0.61, "learning_rate": 4.692986287011966e-05, "loss": 0.9304, "step": 7030 }, { "epoch": 0.61, "learning_rate": 4.69254956764783e-05, "loss": 0.8644, "step": 7040 }, { "epoch": 0.62, "learning_rate": 4.692112848283693e-05, "loss": 0.9132, "step": 7050 }, { "epoch": 0.62, "learning_rate": 4.6916761289195565e-05, "loss": 0.9018, "step": 7060 }, { "epoch": 0.62, "learning_rate": 4.69123940955542e-05, "loss": 0.9422, "step": 7070 }, { "epoch": 0.62, "learning_rate": 4.690802690191283e-05, "loss": 1.0124, "step": 7080 }, { "epoch": 0.62, "learning_rate": 4.690365970827147e-05, "loss": 0.9288, "step": 7090 }, { "epoch": 0.62, "learning_rate": 4.68992925146301e-05, "loss": 0.944, "step": 7100 }, { "epoch": 0.62, "learning_rate": 4.689492532098874e-05, "loss": 0.9644, "step": 7110 }, { "epoch": 0.62, "learning_rate": 4.6890558127347365e-05, "loss": 0.9294, "step": 7120 }, { "epoch": 0.62, "learning_rate": 4.6886190933706005e-05, "loss": 0.8939, "step": 7130 }, { "epoch": 0.62, "learning_rate": 4.688182374006463e-05, "loss": 0.8937, "step": 7140 }, { "epoch": 0.62, "learning_rate": 4.687745654642327e-05, "loss": 0.9284, "step": 7150 }, { "epoch": 0.63, "learning_rate": 4.6873089352781904e-05, "loss": 0.9391, "step": 7160 }, { "epoch": 0.63, "learning_rate": 4.686872215914054e-05, "loss": 1.059, "step": 7170 }, { "epoch": 0.63, "learning_rate": 4.686435496549918e-05, "loss": 0.9478, "step": 7180 }, { "epoch": 0.63, "learning_rate": 4.6859987771857804e-05, "loss": 0.9316, "step": 7190 }, { "epoch": 0.63, "learning_rate": 4.6855620578216444e-05, "loss": 0.8847, "step": 7200 }, { "epoch": 0.63, "learning_rate": 4.685125338457507e-05, "loss": 0.8741, "step": 7210 }, { "epoch": 0.63, "learning_rate": 4.684688619093371e-05, "loss": 0.9435, "step": 7220 }, { "epoch": 0.63, "learning_rate": 4.684251899729234e-05, "loss": 0.9314, "step": 7230 }, { "epoch": 0.63, "learning_rate": 4.683815180365098e-05, "loss": 0.9677, "step": 7240 }, { "epoch": 0.63, "learning_rate": 4.683378461000961e-05, "loss": 0.9047, "step": 7250 }, { "epoch": 0.63, "learning_rate": 4.682941741636824e-05, "loss": 0.89, "step": 7260 }, { "epoch": 0.63, "learning_rate": 4.6825050222726876e-05, "loss": 0.8205, "step": 7270 }, { "epoch": 0.64, "learning_rate": 4.682068302908551e-05, "loss": 1.0917, "step": 7280 }, { "epoch": 0.64, "learning_rate": 4.681631583544415e-05, "loss": 0.9073, "step": 7290 }, { "epoch": 0.64, "learning_rate": 4.6811948641802776e-05, "loss": 1.0848, "step": 7300 }, { "epoch": 0.64, "learning_rate": 4.6807581448161416e-05, "loss": 0.9949, "step": 7310 }, { "epoch": 0.64, "learning_rate": 4.680321425452005e-05, "loss": 0.9402, "step": 7320 }, { "epoch": 0.64, "learning_rate": 4.679884706087868e-05, "loss": 0.8855, "step": 7330 }, { "epoch": 0.64, "learning_rate": 4.6794479867237315e-05, "loss": 1.0907, "step": 7340 }, { "epoch": 0.64, "learning_rate": 4.679011267359595e-05, "loss": 1.0442, "step": 7350 }, { "epoch": 0.64, "learning_rate": 4.678574547995458e-05, "loss": 0.9652, "step": 7360 }, { "epoch": 0.64, "learning_rate": 4.6781378286313215e-05, "loss": 0.9912, "step": 7370 }, { "epoch": 0.64, "learning_rate": 4.6777011092671855e-05, "loss": 0.9111, "step": 7380 }, { "epoch": 0.65, "learning_rate": 4.677264389903048e-05, "loss": 0.9201, "step": 7390 }, { "epoch": 0.65, "learning_rate": 4.676827670538912e-05, "loss": 0.838, "step": 7400 }, { "epoch": 0.65, "learning_rate": 4.6763909511747755e-05, "loss": 0.8992, "step": 7410 }, { "epoch": 0.65, "learning_rate": 4.675954231810639e-05, "loss": 0.9815, "step": 7420 }, { "epoch": 0.65, "learning_rate": 4.675517512446502e-05, "loss": 1.0163, "step": 7430 }, { "epoch": 0.65, "learning_rate": 4.6750807930823654e-05, "loss": 0.9521, "step": 7440 }, { "epoch": 0.65, "learning_rate": 4.674644073718229e-05, "loss": 0.9802, "step": 7450 }, { "epoch": 0.65, "learning_rate": 4.674207354354092e-05, "loss": 0.9717, "step": 7460 }, { "epoch": 0.65, "learning_rate": 4.6737706349899554e-05, "loss": 0.849, "step": 7470 }, { "epoch": 0.65, "learning_rate": 4.6733339156258194e-05, "loss": 0.8842, "step": 7480 }, { "epoch": 0.65, "learning_rate": 4.672897196261683e-05, "loss": 0.9432, "step": 7490 }, { "epoch": 0.66, "learning_rate": 4.672460476897546e-05, "loss": 0.9463, "step": 7500 }, { "epoch": 0.66, "learning_rate": 4.6720237575334093e-05, "loss": 0.9011, "step": 7510 }, { "epoch": 0.66, "learning_rate": 4.671587038169273e-05, "loss": 0.9069, "step": 7520 }, { "epoch": 0.66, "learning_rate": 4.671150318805136e-05, "loss": 0.8761, "step": 7530 }, { "epoch": 0.66, "learning_rate": 4.670713599440999e-05, "loss": 0.8911, "step": 7540 }, { "epoch": 0.66, "learning_rate": 4.6702768800768626e-05, "loss": 0.891, "step": 7550 }, { "epoch": 0.66, "learning_rate": 4.669840160712726e-05, "loss": 1.0597, "step": 7560 }, { "epoch": 0.66, "learning_rate": 4.66940344134859e-05, "loss": 0.9485, "step": 7570 }, { "epoch": 0.66, "learning_rate": 4.668966721984453e-05, "loss": 0.9077, "step": 7580 }, { "epoch": 0.66, "learning_rate": 4.6685300026203166e-05, "loss": 1.0007, "step": 7590 }, { "epoch": 0.66, "learning_rate": 4.66809328325618e-05, "loss": 0.9607, "step": 7600 }, { "epoch": 0.66, "learning_rate": 4.667656563892043e-05, "loss": 0.879, "step": 7610 }, { "epoch": 0.67, "learning_rate": 4.6672198445279065e-05, "loss": 0.9697, "step": 7620 }, { "epoch": 0.67, "learning_rate": 4.66678312516377e-05, "loss": 0.9778, "step": 7630 }, { "epoch": 0.67, "learning_rate": 4.666346405799633e-05, "loss": 0.9063, "step": 7640 }, { "epoch": 0.67, "learning_rate": 4.6659096864354965e-05, "loss": 0.9382, "step": 7650 }, { "epoch": 0.67, "learning_rate": 4.6654729670713605e-05, "loss": 0.8835, "step": 7660 }, { "epoch": 0.67, "learning_rate": 4.665036247707223e-05, "loss": 0.9242, "step": 7670 }, { "epoch": 0.67, "learning_rate": 4.664599528343087e-05, "loss": 0.9049, "step": 7680 }, { "epoch": 0.67, "learning_rate": 4.6641628089789505e-05, "loss": 0.9353, "step": 7690 }, { "epoch": 0.67, "learning_rate": 4.663726089614814e-05, "loss": 0.9879, "step": 7700 }, { "epoch": 0.67, "learning_rate": 4.663289370250677e-05, "loss": 0.9736, "step": 7710 }, { "epoch": 0.67, "learning_rate": 4.6628526508865404e-05, "loss": 0.9421, "step": 7720 }, { "epoch": 0.68, "learning_rate": 4.6624159315224044e-05, "loss": 0.951, "step": 7730 }, { "epoch": 0.68, "learning_rate": 4.661979212158267e-05, "loss": 1.0041, "step": 7740 }, { "epoch": 0.68, "learning_rate": 4.661542492794131e-05, "loss": 0.9923, "step": 7750 }, { "epoch": 0.68, "learning_rate": 4.661105773429994e-05, "loss": 1.0187, "step": 7760 }, { "epoch": 0.68, "learning_rate": 4.660669054065858e-05, "loss": 0.9704, "step": 7770 }, { "epoch": 0.68, "learning_rate": 4.660232334701721e-05, "loss": 0.9147, "step": 7780 }, { "epoch": 0.68, "learning_rate": 4.6597956153375843e-05, "loss": 0.9098, "step": 7790 }, { "epoch": 0.68, "learning_rate": 4.659358895973448e-05, "loss": 0.8603, "step": 7800 }, { "epoch": 0.68, "learning_rate": 4.658922176609311e-05, "loss": 0.9084, "step": 7810 }, { "epoch": 0.68, "learning_rate": 4.658485457245175e-05, "loss": 0.943, "step": 7820 }, { "epoch": 0.68, "learning_rate": 4.6580487378810376e-05, "loss": 1.0826, "step": 7830 }, { "epoch": 0.68, "learning_rate": 4.6576120185169016e-05, "loss": 0.8726, "step": 7840 }, { "epoch": 0.69, "learning_rate": 4.657175299152764e-05, "loss": 0.8969, "step": 7850 }, { "epoch": 0.69, "learning_rate": 4.656738579788628e-05, "loss": 0.9451, "step": 7860 }, { "epoch": 0.69, "learning_rate": 4.656301860424491e-05, "loss": 0.8773, "step": 7870 }, { "epoch": 0.69, "learning_rate": 4.655865141060355e-05, "loss": 0.8431, "step": 7880 }, { "epoch": 0.69, "learning_rate": 4.655428421696218e-05, "loss": 1.0478, "step": 7890 }, { "epoch": 0.69, "learning_rate": 4.6549917023320816e-05, "loss": 1.0526, "step": 7900 }, { "epoch": 0.69, "learning_rate": 4.6545549829679455e-05, "loss": 0.8925, "step": 7910 }, { "epoch": 0.69, "learning_rate": 4.654118263603808e-05, "loss": 0.9562, "step": 7920 }, { "epoch": 0.69, "learning_rate": 4.653681544239672e-05, "loss": 0.9244, "step": 7930 }, { "epoch": 0.69, "learning_rate": 4.653244824875535e-05, "loss": 0.9023, "step": 7940 }, { "epoch": 0.69, "learning_rate": 4.652808105511399e-05, "loss": 0.899, "step": 7950 }, { "epoch": 0.7, "learning_rate": 4.6523713861472615e-05, "loss": 0.8497, "step": 7960 }, { "epoch": 0.7, "learning_rate": 4.6519346667831255e-05, "loss": 0.9133, "step": 7970 }, { "epoch": 0.7, "learning_rate": 4.651497947418989e-05, "loss": 0.9295, "step": 7980 }, { "epoch": 0.7, "learning_rate": 4.651061228054852e-05, "loss": 1.0113, "step": 7990 }, { "epoch": 0.7, "learning_rate": 4.6506245086907154e-05, "loss": 0.9535, "step": 8000 }, { "epoch": 0.7, "learning_rate": 4.650187789326579e-05, "loss": 0.8836, "step": 8010 }, { "epoch": 0.7, "learning_rate": 4.649751069962443e-05, "loss": 0.9482, "step": 8020 }, { "epoch": 0.7, "learning_rate": 4.6493143505983054e-05, "loss": 0.9013, "step": 8030 }, { "epoch": 0.7, "learning_rate": 4.6488776312341694e-05, "loss": 1.0265, "step": 8040 }, { "epoch": 0.7, "learning_rate": 4.648440911870032e-05, "loss": 0.8934, "step": 8050 }, { "epoch": 0.7, "learning_rate": 4.648004192505896e-05, "loss": 1.0311, "step": 8060 }, { "epoch": 0.7, "learning_rate": 4.6475674731417594e-05, "loss": 0.828, "step": 8070 }, { "epoch": 0.71, "learning_rate": 4.647130753777623e-05, "loss": 0.9535, "step": 8080 }, { "epoch": 0.71, "learning_rate": 4.646694034413486e-05, "loss": 0.9586, "step": 8090 }, { "epoch": 0.71, "learning_rate": 4.646257315049349e-05, "loss": 0.8356, "step": 8100 }, { "epoch": 0.71, "learning_rate": 4.645820595685213e-05, "loss": 0.9981, "step": 8110 }, { "epoch": 0.71, "learning_rate": 4.645383876321076e-05, "loss": 0.9218, "step": 8120 }, { "epoch": 0.71, "learning_rate": 4.64494715695694e-05, "loss": 0.9265, "step": 8130 }, { "epoch": 0.71, "learning_rate": 4.644510437592803e-05, "loss": 0.9731, "step": 8140 }, { "epoch": 0.71, "learning_rate": 4.6440737182286666e-05, "loss": 0.9636, "step": 8150 }, { "epoch": 0.71, "learning_rate": 4.64363699886453e-05, "loss": 0.9791, "step": 8160 }, { "epoch": 0.71, "learning_rate": 4.643200279500393e-05, "loss": 0.9366, "step": 8170 }, { "epoch": 0.71, "learning_rate": 4.6427635601362566e-05, "loss": 0.9753, "step": 8180 }, { "epoch": 0.72, "learning_rate": 4.64232684077212e-05, "loss": 0.8684, "step": 8190 }, { "epoch": 0.72, "learning_rate": 4.641890121407983e-05, "loss": 1.0108, "step": 8200 }, { "epoch": 0.72, "learning_rate": 4.6414534020438465e-05, "loss": 0.9217, "step": 8210 }, { "epoch": 0.72, "learning_rate": 4.6410166826797105e-05, "loss": 0.9008, "step": 8220 }, { "epoch": 0.72, "learning_rate": 4.640579963315574e-05, "loss": 0.8441, "step": 8230 }, { "epoch": 0.72, "learning_rate": 4.640143243951437e-05, "loss": 0.8878, "step": 8240 }, { "epoch": 0.72, "learning_rate": 4.6397065245873005e-05, "loss": 0.9021, "step": 8250 }, { "epoch": 0.72, "learning_rate": 4.639269805223164e-05, "loss": 0.877, "step": 8260 }, { "epoch": 0.72, "learning_rate": 4.638833085859027e-05, "loss": 0.9297, "step": 8270 }, { "epoch": 0.72, "learning_rate": 4.6383963664948904e-05, "loss": 1.0486, "step": 8280 }, { "epoch": 0.72, "learning_rate": 4.637959647130754e-05, "loss": 0.8648, "step": 8290 }, { "epoch": 0.72, "learning_rate": 4.637522927766617e-05, "loss": 0.8761, "step": 8300 }, { "epoch": 0.73, "learning_rate": 4.637086208402481e-05, "loss": 0.88, "step": 8310 }, { "epoch": 0.73, "learning_rate": 4.6366494890383444e-05, "loss": 0.8634, "step": 8320 }, { "epoch": 0.73, "learning_rate": 4.636212769674208e-05, "loss": 0.9119, "step": 8330 }, { "epoch": 0.73, "learning_rate": 4.635776050310071e-05, "loss": 0.8945, "step": 8340 }, { "epoch": 0.73, "learning_rate": 4.6353393309459344e-05, "loss": 0.8123, "step": 8350 }, { "epoch": 0.73, "learning_rate": 4.634902611581798e-05, "loss": 0.9114, "step": 8360 }, { "epoch": 0.73, "learning_rate": 4.634465892217661e-05, "loss": 0.9065, "step": 8370 }, { "epoch": 0.73, "learning_rate": 4.634029172853524e-05, "loss": 0.7807, "step": 8380 }, { "epoch": 0.73, "learning_rate": 4.633592453489388e-05, "loss": 0.9842, "step": 8390 }, { "epoch": 0.73, "learning_rate": 4.633155734125251e-05, "loss": 0.9122, "step": 8400 }, { "epoch": 0.73, "learning_rate": 4.632719014761115e-05, "loss": 0.9702, "step": 8410 }, { "epoch": 0.74, "learning_rate": 4.632282295396978e-05, "loss": 1.0104, "step": 8420 }, { "epoch": 0.74, "learning_rate": 4.6318455760328416e-05, "loss": 1.0367, "step": 8430 }, { "epoch": 0.74, "learning_rate": 4.631408856668705e-05, "loss": 0.9515, "step": 8440 }, { "epoch": 0.74, "learning_rate": 4.630972137304568e-05, "loss": 0.9196, "step": 8450 }, { "epoch": 0.74, "learning_rate": 4.6305354179404316e-05, "loss": 0.9348, "step": 8460 }, { "epoch": 0.74, "learning_rate": 4.630098698576295e-05, "loss": 0.9261, "step": 8470 }, { "epoch": 0.74, "learning_rate": 4.629661979212159e-05, "loss": 0.8827, "step": 8480 }, { "epoch": 0.74, "learning_rate": 4.6292252598480215e-05, "loss": 0.8363, "step": 8490 }, { "epoch": 0.74, "learning_rate": 4.6287885404838855e-05, "loss": 0.9509, "step": 8500 }, { "epoch": 0.74, "learning_rate": 4.628351821119749e-05, "loss": 0.9425, "step": 8510 }, { "epoch": 0.74, "learning_rate": 4.627915101755612e-05, "loss": 1.0142, "step": 8520 }, { "epoch": 0.75, "learning_rate": 4.6274783823914755e-05, "loss": 0.9711, "step": 8530 }, { "epoch": 0.75, "learning_rate": 4.627041663027339e-05, "loss": 0.9595, "step": 8540 }, { "epoch": 0.75, "learning_rate": 4.626604943663203e-05, "loss": 0.9476, "step": 8550 }, { "epoch": 0.75, "learning_rate": 4.6261682242990654e-05, "loss": 0.9114, "step": 8560 }, { "epoch": 0.75, "learning_rate": 4.6257315049349294e-05, "loss": 0.9543, "step": 8570 }, { "epoch": 0.75, "learning_rate": 4.625294785570792e-05, "loss": 0.9228, "step": 8580 }, { "epoch": 0.75, "learning_rate": 4.624858066206656e-05, "loss": 0.9868, "step": 8590 }, { "epoch": 0.75, "learning_rate": 4.624421346842519e-05, "loss": 0.8871, "step": 8600 }, { "epoch": 0.75, "learning_rate": 4.623984627478383e-05, "loss": 0.8485, "step": 8610 }, { "epoch": 0.75, "learning_rate": 4.623547908114246e-05, "loss": 0.9434, "step": 8620 }, { "epoch": 0.75, "learning_rate": 4.6231111887501094e-05, "loss": 0.9924, "step": 8630 }, { "epoch": 0.75, "learning_rate": 4.6226744693859734e-05, "loss": 0.8118, "step": 8640 }, { "epoch": 0.76, "learning_rate": 4.622237750021836e-05, "loss": 0.9211, "step": 8650 }, { "epoch": 0.76, "learning_rate": 4.6218010306577e-05, "loss": 0.9975, "step": 8660 }, { "epoch": 0.76, "learning_rate": 4.6213643112935626e-05, "loss": 0.9707, "step": 8670 }, { "epoch": 0.76, "learning_rate": 4.6209275919294266e-05, "loss": 0.8622, "step": 8680 }, { "epoch": 0.76, "learning_rate": 4.620490872565289e-05, "loss": 1.022, "step": 8690 }, { "epoch": 0.76, "learning_rate": 4.620054153201153e-05, "loss": 0.9881, "step": 8700 }, { "epoch": 0.76, "learning_rate": 4.6196174338370166e-05, "loss": 0.8869, "step": 8710 }, { "epoch": 0.76, "learning_rate": 4.61918071447288e-05, "loss": 1.0154, "step": 8720 }, { "epoch": 0.76, "learning_rate": 4.618743995108743e-05, "loss": 0.9508, "step": 8730 }, { "epoch": 0.76, "learning_rate": 4.6183072757446066e-05, "loss": 0.8722, "step": 8740 }, { "epoch": 0.76, "learning_rate": 4.6178705563804706e-05, "loss": 0.9598, "step": 8750 }, { "epoch": 0.77, "learning_rate": 4.617433837016333e-05, "loss": 0.8668, "step": 8760 }, { "epoch": 0.77, "learning_rate": 4.616997117652197e-05, "loss": 0.9405, "step": 8770 }, { "epoch": 0.77, "learning_rate": 4.61656039828806e-05, "loss": 0.846, "step": 8780 }, { "epoch": 0.77, "learning_rate": 4.616123678923924e-05, "loss": 0.9587, "step": 8790 }, { "epoch": 0.77, "learning_rate": 4.615686959559787e-05, "loss": 0.8373, "step": 8800 }, { "epoch": 0.77, "learning_rate": 4.6152502401956505e-05, "loss": 0.8456, "step": 8810 }, { "epoch": 0.77, "learning_rate": 4.614813520831514e-05, "loss": 0.9896, "step": 8820 }, { "epoch": 0.77, "learning_rate": 4.614376801467377e-05, "loss": 1.0811, "step": 8830 }, { "epoch": 0.77, "learning_rate": 4.613940082103241e-05, "loss": 0.997, "step": 8840 }, { "epoch": 0.77, "learning_rate": 4.613503362739104e-05, "loss": 0.982, "step": 8850 }, { "epoch": 0.77, "learning_rate": 4.613066643374968e-05, "loss": 0.9422, "step": 8860 }, { "epoch": 0.77, "learning_rate": 4.6126299240108304e-05, "loss": 0.8499, "step": 8870 }, { "epoch": 0.78, "learning_rate": 4.6121932046466944e-05, "loss": 0.9511, "step": 8880 }, { "epoch": 0.78, "learning_rate": 4.611756485282558e-05, "loss": 0.8959, "step": 8890 }, { "epoch": 0.78, "learning_rate": 4.611319765918421e-05, "loss": 0.8872, "step": 8900 }, { "epoch": 0.78, "learning_rate": 4.6108830465542844e-05, "loss": 0.9708, "step": 8910 }, { "epoch": 0.78, "learning_rate": 4.610446327190148e-05, "loss": 0.8085, "step": 8920 }, { "epoch": 0.78, "learning_rate": 4.610009607826011e-05, "loss": 0.9264, "step": 8930 }, { "epoch": 0.78, "learning_rate": 4.609572888461874e-05, "loss": 0.8876, "step": 8940 }, { "epoch": 0.78, "learning_rate": 4.609136169097738e-05, "loss": 1.0197, "step": 8950 }, { "epoch": 0.78, "learning_rate": 4.6086994497336016e-05, "loss": 1.0333, "step": 8960 }, { "epoch": 0.78, "learning_rate": 4.608262730369465e-05, "loss": 0.8699, "step": 8970 }, { "epoch": 0.78, "learning_rate": 4.607826011005328e-05, "loss": 0.9157, "step": 8980 }, { "epoch": 0.79, "learning_rate": 4.6073892916411916e-05, "loss": 0.8753, "step": 8990 }, { "epoch": 0.79, "learning_rate": 4.606952572277055e-05, "loss": 0.9993, "step": 9000 }, { "epoch": 0.79, "learning_rate": 4.606515852912918e-05, "loss": 0.8956, "step": 9010 }, { "epoch": 0.79, "learning_rate": 4.6060791335487816e-05, "loss": 0.9916, "step": 9020 }, { "epoch": 0.79, "learning_rate": 4.605642414184645e-05, "loss": 0.8797, "step": 9030 }, { "epoch": 0.79, "learning_rate": 4.605205694820509e-05, "loss": 0.9454, "step": 9040 }, { "epoch": 0.79, "learning_rate": 4.604768975456372e-05, "loss": 0.8976, "step": 9050 }, { "epoch": 0.79, "learning_rate": 4.6043322560922355e-05, "loss": 0.8632, "step": 9060 }, { "epoch": 0.79, "learning_rate": 4.603895536728099e-05, "loss": 0.8919, "step": 9070 }, { "epoch": 0.79, "learning_rate": 4.603458817363962e-05, "loss": 0.791, "step": 9080 }, { "epoch": 0.79, "learning_rate": 4.6030220979998255e-05, "loss": 0.824, "step": 9090 }, { "epoch": 0.79, "learning_rate": 4.602585378635689e-05, "loss": 1.0017, "step": 9100 }, { "epoch": 0.8, "learning_rate": 4.602148659271552e-05, "loss": 0.909, "step": 9110 }, { "epoch": 0.8, "learning_rate": 4.6017119399074154e-05, "loss": 0.9327, "step": 9120 }, { "epoch": 0.8, "learning_rate": 4.601275220543279e-05, "loss": 0.9564, "step": 9130 }, { "epoch": 0.8, "learning_rate": 4.600838501179143e-05, "loss": 0.9171, "step": 9140 }, { "epoch": 0.8, "learning_rate": 4.600401781815006e-05, "loss": 1.0033, "step": 9150 }, { "epoch": 0.8, "learning_rate": 4.5999650624508694e-05, "loss": 0.8151, "step": 9160 }, { "epoch": 0.8, "learning_rate": 4.599528343086733e-05, "loss": 0.9367, "step": 9170 }, { "epoch": 0.8, "learning_rate": 4.599091623722596e-05, "loss": 0.7902, "step": 9180 }, { "epoch": 0.8, "learning_rate": 4.5986549043584594e-05, "loss": 0.8708, "step": 9190 }, { "epoch": 0.8, "learning_rate": 4.598218184994323e-05, "loss": 0.7969, "step": 9200 }, { "epoch": 0.8, "learning_rate": 4.597781465630187e-05, "loss": 1.0689, "step": 9210 }, { "epoch": 0.81, "learning_rate": 4.597344746266049e-05, "loss": 1.0747, "step": 9220 }, { "epoch": 0.81, "learning_rate": 4.596908026901913e-05, "loss": 0.9492, "step": 9230 }, { "epoch": 0.81, "learning_rate": 4.5964713075377766e-05, "loss": 0.9497, "step": 9240 }, { "epoch": 0.81, "learning_rate": 4.59603458817364e-05, "loss": 0.9191, "step": 9250 }, { "epoch": 0.81, "learning_rate": 4.595597868809503e-05, "loss": 0.8723, "step": 9260 }, { "epoch": 0.81, "learning_rate": 4.5951611494453666e-05, "loss": 0.9379, "step": 9270 }, { "epoch": 0.81, "learning_rate": 4.59472443008123e-05, "loss": 0.9149, "step": 9280 }, { "epoch": 0.81, "learning_rate": 4.594287710717093e-05, "loss": 0.9203, "step": 9290 }, { "epoch": 0.81, "learning_rate": 4.593850991352957e-05, "loss": 0.8775, "step": 9300 }, { "epoch": 0.81, "learning_rate": 4.59341427198882e-05, "loss": 0.8603, "step": 9310 }, { "epoch": 0.81, "learning_rate": 4.592977552624684e-05, "loss": 0.978, "step": 9320 }, { "epoch": 0.81, "learning_rate": 4.5925408332605465e-05, "loss": 0.8297, "step": 9330 }, { "epoch": 0.82, "learning_rate": 4.5921041138964105e-05, "loss": 0.797, "step": 9340 }, { "epoch": 0.82, "learning_rate": 4.591667394532274e-05, "loss": 0.8891, "step": 9350 }, { "epoch": 0.82, "learning_rate": 4.591230675168137e-05, "loss": 0.9431, "step": 9360 }, { "epoch": 0.82, "learning_rate": 4.590793955804001e-05, "loss": 0.8584, "step": 9370 }, { "epoch": 0.82, "learning_rate": 4.590357236439864e-05, "loss": 0.9427, "step": 9380 }, { "epoch": 0.82, "learning_rate": 4.589920517075728e-05, "loss": 0.8603, "step": 9390 }, { "epoch": 0.82, "learning_rate": 4.5894837977115904e-05, "loss": 0.8461, "step": 9400 }, { "epoch": 0.82, "learning_rate": 4.5890470783474544e-05, "loss": 0.9328, "step": 9410 }, { "epoch": 0.82, "learning_rate": 4.588610358983317e-05, "loss": 0.9883, "step": 9420 }, { "epoch": 0.82, "learning_rate": 4.588173639619181e-05, "loss": 0.9322, "step": 9430 }, { "epoch": 0.82, "learning_rate": 4.5877369202550444e-05, "loss": 0.8851, "step": 9440 }, { "epoch": 0.83, "learning_rate": 4.587300200890908e-05, "loss": 0.8376, "step": 9450 }, { "epoch": 0.83, "learning_rate": 4.586863481526771e-05, "loss": 0.8978, "step": 9460 }, { "epoch": 0.83, "learning_rate": 4.5864267621626344e-05, "loss": 1.0406, "step": 9470 }, { "epoch": 0.83, "learning_rate": 4.5859900427984984e-05, "loss": 0.9252, "step": 9480 }, { "epoch": 0.83, "learning_rate": 4.585553323434361e-05, "loss": 0.8589, "step": 9490 }, { "epoch": 0.83, "learning_rate": 4.585116604070225e-05, "loss": 0.9067, "step": 9500 }, { "epoch": 0.83, "learning_rate": 4.5846798847060876e-05, "loss": 0.967, "step": 9510 }, { "epoch": 0.83, "learning_rate": 4.5842431653419516e-05, "loss": 0.9374, "step": 9520 }, { "epoch": 0.83, "learning_rate": 4.583806445977814e-05, "loss": 0.8765, "step": 9530 }, { "epoch": 0.83, "learning_rate": 4.583369726613678e-05, "loss": 0.9199, "step": 9540 }, { "epoch": 0.83, "learning_rate": 4.5829330072495416e-05, "loss": 0.8998, "step": 9550 }, { "epoch": 0.84, "learning_rate": 4.582496287885405e-05, "loss": 0.8998, "step": 9560 }, { "epoch": 0.84, "learning_rate": 4.582059568521269e-05, "loss": 0.8978, "step": 9570 }, { "epoch": 0.84, "learning_rate": 4.5816228491571316e-05, "loss": 0.8726, "step": 9580 }, { "epoch": 0.84, "learning_rate": 4.5811861297929956e-05, "loss": 0.7637, "step": 9590 }, { "epoch": 0.84, "learning_rate": 4.580749410428858e-05, "loss": 0.9338, "step": 9600 }, { "epoch": 0.84, "learning_rate": 4.580312691064722e-05, "loss": 0.9755, "step": 9610 }, { "epoch": 0.84, "learning_rate": 4.5798759717005855e-05, "loss": 0.9454, "step": 9620 }, { "epoch": 0.84, "learning_rate": 4.579439252336449e-05, "loss": 0.9308, "step": 9630 }, { "epoch": 0.84, "learning_rate": 4.579002532972312e-05, "loss": 0.963, "step": 9640 }, { "epoch": 0.84, "learning_rate": 4.5785658136081755e-05, "loss": 0.9523, "step": 9650 }, { "epoch": 0.84, "learning_rate": 4.578129094244039e-05, "loss": 0.8774, "step": 9660 }, { "epoch": 0.84, "learning_rate": 4.577692374879902e-05, "loss": 0.9609, "step": 9670 }, { "epoch": 0.85, "learning_rate": 4.577255655515766e-05, "loss": 0.8705, "step": 9680 }, { "epoch": 0.85, "learning_rate": 4.576818936151629e-05, "loss": 0.917, "step": 9690 }, { "epoch": 0.85, "learning_rate": 4.576382216787493e-05, "loss": 0.8363, "step": 9700 }, { "epoch": 0.85, "learning_rate": 4.575945497423356e-05, "loss": 1.0622, "step": 9710 }, { "epoch": 0.85, "learning_rate": 4.5755087780592194e-05, "loss": 0.8973, "step": 9720 }, { "epoch": 0.85, "learning_rate": 4.575072058695083e-05, "loss": 0.9617, "step": 9730 }, { "epoch": 0.85, "learning_rate": 4.574635339330946e-05, "loss": 0.8569, "step": 9740 }, { "epoch": 0.85, "learning_rate": 4.5741986199668094e-05, "loss": 0.8894, "step": 9750 }, { "epoch": 0.85, "learning_rate": 4.573761900602673e-05, "loss": 0.978, "step": 9760 }, { "epoch": 0.85, "learning_rate": 4.573325181238537e-05, "loss": 0.8624, "step": 9770 }, { "epoch": 0.85, "learning_rate": 4.572888461874399e-05, "loss": 0.979, "step": 9780 }, { "epoch": 0.86, "learning_rate": 4.572451742510263e-05, "loss": 0.9463, "step": 9790 }, { "epoch": 0.86, "learning_rate": 4.5720150231461266e-05, "loss": 1.0555, "step": 9800 }, { "epoch": 0.86, "learning_rate": 4.57157830378199e-05, "loss": 0.8618, "step": 9810 }, { "epoch": 0.86, "learning_rate": 4.571141584417853e-05, "loss": 0.8996, "step": 9820 }, { "epoch": 0.86, "learning_rate": 4.5707048650537166e-05, "loss": 0.8343, "step": 9830 }, { "epoch": 0.86, "learning_rate": 4.57026814568958e-05, "loss": 0.9402, "step": 9840 }, { "epoch": 0.86, "learning_rate": 4.569831426325443e-05, "loss": 0.9295, "step": 9850 }, { "epoch": 0.86, "learning_rate": 4.5693947069613066e-05, "loss": 1.0123, "step": 9860 }, { "epoch": 0.86, "learning_rate": 4.5689579875971706e-05, "loss": 0.9938, "step": 9870 }, { "epoch": 0.86, "learning_rate": 4.568521268233034e-05, "loss": 0.8626, "step": 9880 }, { "epoch": 0.86, "learning_rate": 4.568084548868897e-05, "loss": 0.9466, "step": 9890 }, { "epoch": 0.86, "learning_rate": 4.5676478295047605e-05, "loss": 0.858, "step": 9900 }, { "epoch": 0.87, "learning_rate": 4.567211110140624e-05, "loss": 0.765, "step": 9910 }, { "epoch": 0.87, "learning_rate": 4.566774390776487e-05, "loss": 0.9197, "step": 9920 }, { "epoch": 0.87, "learning_rate": 4.5663376714123505e-05, "loss": 0.8931, "step": 9930 }, { "epoch": 0.87, "learning_rate": 4.565900952048214e-05, "loss": 1.001, "step": 9940 }, { "epoch": 0.87, "learning_rate": 4.565464232684077e-05, "loss": 0.8776, "step": 9950 }, { "epoch": 0.87, "learning_rate": 4.565027513319941e-05, "loss": 0.8704, "step": 9960 }, { "epoch": 0.87, "learning_rate": 4.5645907939558044e-05, "loss": 0.8803, "step": 9970 }, { "epoch": 0.87, "learning_rate": 4.564154074591668e-05, "loss": 0.9628, "step": 9980 }, { "epoch": 0.87, "learning_rate": 4.563717355227531e-05, "loss": 0.8756, "step": 9990 }, { "epoch": 0.87, "learning_rate": 4.5632806358633944e-05, "loss": 0.9446, "step": 10000 }, { "epoch": 0.87, "eval_accuracy": 0.5734499361304903, "eval_loss": 0.9110677242279053, "eval_runtime": 84.0697, "eval_samples_per_second": 121.054, "eval_steps_per_second": 15.142, "step": 10000 }, { "epoch": 0.87, "learning_rate": 4.562843916499258e-05, "loss": 0.9811, "step": 10010 }, { "epoch": 0.88, "learning_rate": 4.562407197135121e-05, "loss": 0.831, "step": 10020 }, { "epoch": 0.88, "learning_rate": 4.561970477770985e-05, "loss": 0.759, "step": 10030 }, { "epoch": 0.88, "learning_rate": 4.561533758406848e-05, "loss": 1.0103, "step": 10040 }, { "epoch": 0.88, "learning_rate": 4.561097039042712e-05, "loss": 0.8994, "step": 10050 }, { "epoch": 0.88, "learning_rate": 4.560660319678574e-05, "loss": 0.8891, "step": 10060 }, { "epoch": 0.88, "learning_rate": 4.560223600314438e-05, "loss": 0.9094, "step": 10070 }, { "epoch": 0.88, "learning_rate": 4.5597868809503016e-05, "loss": 0.9042, "step": 10080 }, { "epoch": 0.88, "learning_rate": 4.559350161586165e-05, "loss": 0.9048, "step": 10090 }, { "epoch": 0.88, "learning_rate": 4.558913442222028e-05, "loss": 0.879, "step": 10100 }, { "epoch": 0.88, "learning_rate": 4.5584767228578916e-05, "loss": 0.9188, "step": 10110 }, { "epoch": 0.88, "learning_rate": 4.5580400034937556e-05, "loss": 0.8391, "step": 10120 }, { "epoch": 0.88, "learning_rate": 4.557603284129618e-05, "loss": 0.7726, "step": 10130 }, { "epoch": 0.89, "learning_rate": 4.557166564765482e-05, "loss": 0.9201, "step": 10140 }, { "epoch": 0.89, "learning_rate": 4.556729845401345e-05, "loss": 1.0287, "step": 10150 }, { "epoch": 0.89, "learning_rate": 4.556293126037209e-05, "loss": 0.9263, "step": 10160 }, { "epoch": 0.89, "learning_rate": 4.555856406673072e-05, "loss": 0.8906, "step": 10170 }, { "epoch": 0.89, "learning_rate": 4.5554196873089355e-05, "loss": 0.9107, "step": 10180 }, { "epoch": 0.89, "learning_rate": 4.554982967944799e-05, "loss": 0.9614, "step": 10190 }, { "epoch": 0.89, "learning_rate": 4.554546248580662e-05, "loss": 0.9123, "step": 10200 }, { "epoch": 0.89, "learning_rate": 4.554109529216526e-05, "loss": 0.8693, "step": 10210 }, { "epoch": 0.89, "learning_rate": 4.553672809852389e-05, "loss": 0.8381, "step": 10220 }, { "epoch": 0.89, "learning_rate": 4.553236090488253e-05, "loss": 0.9337, "step": 10230 }, { "epoch": 0.89, "learning_rate": 4.5527993711241154e-05, "loss": 0.9651, "step": 10240 }, { "epoch": 0.9, "learning_rate": 4.5523626517599794e-05, "loss": 0.9808, "step": 10250 }, { "epoch": 0.9, "learning_rate": 4.551925932395842e-05, "loss": 1.0326, "step": 10260 }, { "epoch": 0.9, "learning_rate": 4.551489213031706e-05, "loss": 0.8245, "step": 10270 }, { "epoch": 0.9, "learning_rate": 4.5510524936675694e-05, "loss": 0.814, "step": 10280 }, { "epoch": 0.9, "learning_rate": 4.550615774303433e-05, "loss": 0.9873, "step": 10290 }, { "epoch": 0.9, "learning_rate": 4.550179054939297e-05, "loss": 0.9243, "step": 10300 }, { "epoch": 0.9, "learning_rate": 4.5497423355751594e-05, "loss": 0.9284, "step": 10310 }, { "epoch": 0.9, "learning_rate": 4.5493056162110234e-05, "loss": 1.0906, "step": 10320 }, { "epoch": 0.9, "learning_rate": 4.548868896846886e-05, "loss": 0.8104, "step": 10330 }, { "epoch": 0.9, "learning_rate": 4.54843217748275e-05, "loss": 0.9079, "step": 10340 }, { "epoch": 0.9, "learning_rate": 4.5479954581186126e-05, "loss": 1.0131, "step": 10350 }, { "epoch": 0.9, "learning_rate": 4.5475587387544766e-05, "loss": 0.7866, "step": 10360 }, { "epoch": 0.91, "learning_rate": 4.54712201939034e-05, "loss": 1.0117, "step": 10370 }, { "epoch": 0.91, "learning_rate": 4.546685300026203e-05, "loss": 0.8584, "step": 10380 }, { "epoch": 0.91, "learning_rate": 4.5462485806620666e-05, "loss": 1.0829, "step": 10390 }, { "epoch": 0.91, "learning_rate": 4.54581186129793e-05, "loss": 0.9519, "step": 10400 }, { "epoch": 0.91, "learning_rate": 4.545375141933794e-05, "loss": 0.9963, "step": 10410 }, { "epoch": 0.91, "learning_rate": 4.5449384225696566e-05, "loss": 1.0018, "step": 10420 }, { "epoch": 0.91, "learning_rate": 4.5445017032055206e-05, "loss": 0.9403, "step": 10430 }, { "epoch": 0.91, "learning_rate": 4.544064983841384e-05, "loss": 0.8824, "step": 10440 }, { "epoch": 0.91, "learning_rate": 4.543628264477247e-05, "loss": 0.9186, "step": 10450 }, { "epoch": 0.91, "learning_rate": 4.5431915451131105e-05, "loss": 0.8281, "step": 10460 }, { "epoch": 0.91, "learning_rate": 4.542754825748974e-05, "loss": 0.9951, "step": 10470 }, { "epoch": 0.92, "learning_rate": 4.542318106384837e-05, "loss": 0.9504, "step": 10480 }, { "epoch": 0.92, "learning_rate": 4.5418813870207005e-05, "loss": 0.8944, "step": 10490 }, { "epoch": 0.92, "learning_rate": 4.5414446676565645e-05, "loss": 0.8705, "step": 10500 }, { "epoch": 0.92, "learning_rate": 4.541007948292427e-05, "loss": 0.8399, "step": 10510 }, { "epoch": 0.92, "learning_rate": 4.540571228928291e-05, "loss": 0.9143, "step": 10520 }, { "epoch": 0.92, "learning_rate": 4.5401345095641544e-05, "loss": 0.9148, "step": 10530 }, { "epoch": 0.92, "learning_rate": 4.539697790200018e-05, "loss": 0.8801, "step": 10540 }, { "epoch": 0.92, "learning_rate": 4.539261070835881e-05, "loss": 0.9361, "step": 10550 }, { "epoch": 0.92, "learning_rate": 4.5388243514717444e-05, "loss": 0.9047, "step": 10560 }, { "epoch": 0.92, "learning_rate": 4.538387632107608e-05, "loss": 0.8674, "step": 10570 }, { "epoch": 0.92, "learning_rate": 4.537950912743471e-05, "loss": 0.8867, "step": 10580 }, { "epoch": 0.92, "learning_rate": 4.5375141933793344e-05, "loss": 0.9204, "step": 10590 }, { "epoch": 0.93, "learning_rate": 4.537077474015198e-05, "loss": 0.9896, "step": 10600 }, { "epoch": 0.93, "learning_rate": 4.536640754651062e-05, "loss": 0.9175, "step": 10610 }, { "epoch": 0.93, "learning_rate": 4.536204035286925e-05, "loss": 0.8352, "step": 10620 }, { "epoch": 0.93, "learning_rate": 4.535767315922788e-05, "loss": 0.9144, "step": 10630 }, { "epoch": 0.93, "learning_rate": 4.5353305965586516e-05, "loss": 0.9773, "step": 10640 }, { "epoch": 0.93, "learning_rate": 4.534893877194515e-05, "loss": 0.9161, "step": 10650 }, { "epoch": 0.93, "learning_rate": 4.534457157830378e-05, "loss": 0.8555, "step": 10660 }, { "epoch": 0.93, "learning_rate": 4.5340204384662416e-05, "loss": 0.8726, "step": 10670 }, { "epoch": 0.93, "learning_rate": 4.533583719102105e-05, "loss": 0.9121, "step": 10680 }, { "epoch": 0.93, "learning_rate": 4.533146999737969e-05, "loss": 0.8925, "step": 10690 }, { "epoch": 0.93, "learning_rate": 4.532710280373832e-05, "loss": 0.978, "step": 10700 }, { "epoch": 0.94, "learning_rate": 4.5322735610096956e-05, "loss": 0.8737, "step": 10710 }, { "epoch": 0.94, "learning_rate": 4.531836841645559e-05, "loss": 0.8594, "step": 10720 }, { "epoch": 0.94, "learning_rate": 4.531400122281422e-05, "loss": 0.836, "step": 10730 }, { "epoch": 0.94, "learning_rate": 4.5309634029172855e-05, "loss": 0.9575, "step": 10740 }, { "epoch": 0.94, "learning_rate": 4.530526683553149e-05, "loss": 0.8756, "step": 10750 }, { "epoch": 0.94, "learning_rate": 4.530089964189012e-05, "loss": 0.9887, "step": 10760 }, { "epoch": 0.94, "learning_rate": 4.5296532448248755e-05, "loss": 0.8885, "step": 10770 }, { "epoch": 0.94, "learning_rate": 4.5292165254607395e-05, "loss": 0.9223, "step": 10780 }, { "epoch": 0.94, "learning_rate": 4.528779806096603e-05, "loss": 0.9529, "step": 10790 }, { "epoch": 0.94, "learning_rate": 4.528343086732466e-05, "loss": 0.8108, "step": 10800 }, { "epoch": 0.94, "learning_rate": 4.5279063673683294e-05, "loss": 0.9745, "step": 10810 }, { "epoch": 0.95, "learning_rate": 4.527469648004193e-05, "loss": 0.8772, "step": 10820 }, { "epoch": 0.95, "learning_rate": 4.527032928640056e-05, "loss": 0.875, "step": 10830 }, { "epoch": 0.95, "learning_rate": 4.5265962092759194e-05, "loss": 0.9734, "step": 10840 }, { "epoch": 0.95, "learning_rate": 4.5261594899117834e-05, "loss": 0.9213, "step": 10850 }, { "epoch": 0.95, "learning_rate": 4.525722770547646e-05, "loss": 0.9138, "step": 10860 }, { "epoch": 0.95, "learning_rate": 4.52528605118351e-05, "loss": 0.9242, "step": 10870 }, { "epoch": 0.95, "learning_rate": 4.524849331819373e-05, "loss": 0.9694, "step": 10880 }, { "epoch": 0.95, "learning_rate": 4.524412612455237e-05, "loss": 0.9103, "step": 10890 }, { "epoch": 0.95, "learning_rate": 4.5239758930911e-05, "loss": 0.8588, "step": 10900 }, { "epoch": 0.95, "learning_rate": 4.523539173726963e-05, "loss": 0.9302, "step": 10910 }, { "epoch": 0.95, "learning_rate": 4.5231024543628266e-05, "loss": 0.9684, "step": 10920 }, { "epoch": 0.95, "learning_rate": 4.52266573499869e-05, "loss": 0.8293, "step": 10930 }, { "epoch": 0.96, "learning_rate": 4.522229015634554e-05, "loss": 0.9172, "step": 10940 }, { "epoch": 0.96, "learning_rate": 4.5217922962704166e-05, "loss": 0.9415, "step": 10950 }, { "epoch": 0.96, "learning_rate": 4.5213555769062806e-05, "loss": 0.9137, "step": 10960 }, { "epoch": 0.96, "learning_rate": 4.520918857542143e-05, "loss": 0.9424, "step": 10970 }, { "epoch": 0.96, "learning_rate": 4.520482138178007e-05, "loss": 0.8267, "step": 10980 }, { "epoch": 0.96, "learning_rate": 4.5200454188138706e-05, "loss": 0.8788, "step": 10990 }, { "epoch": 0.96, "learning_rate": 4.519608699449734e-05, "loss": 0.9256, "step": 11000 }, { "epoch": 0.96, "learning_rate": 4.519171980085597e-05, "loss": 1.0088, "step": 11010 }, { "epoch": 0.96, "learning_rate": 4.5187352607214605e-05, "loss": 0.9595, "step": 11020 }, { "epoch": 0.96, "learning_rate": 4.5182985413573245e-05, "loss": 0.9277, "step": 11030 }, { "epoch": 0.96, "learning_rate": 4.517861821993187e-05, "loss": 0.9578, "step": 11040 }, { "epoch": 0.97, "learning_rate": 4.517425102629051e-05, "loss": 0.9886, "step": 11050 }, { "epoch": 0.97, "learning_rate": 4.516988383264914e-05, "loss": 1.0115, "step": 11060 }, { "epoch": 0.97, "learning_rate": 4.516551663900778e-05, "loss": 1.0093, "step": 11070 }, { "epoch": 0.97, "learning_rate": 4.5161149445366404e-05, "loss": 0.889, "step": 11080 }, { "epoch": 0.97, "learning_rate": 4.5156782251725044e-05, "loss": 0.8705, "step": 11090 }, { "epoch": 0.97, "learning_rate": 4.515241505808368e-05, "loss": 0.9113, "step": 11100 }, { "epoch": 0.97, "learning_rate": 4.514804786444231e-05, "loss": 0.8333, "step": 11110 }, { "epoch": 0.97, "learning_rate": 4.514368067080095e-05, "loss": 0.8226, "step": 11120 }, { "epoch": 0.97, "learning_rate": 4.513931347715958e-05, "loss": 0.9597, "step": 11130 }, { "epoch": 0.97, "learning_rate": 4.513494628351822e-05, "loss": 0.9097, "step": 11140 }, { "epoch": 0.97, "learning_rate": 4.5130579089876844e-05, "loss": 0.8062, "step": 11150 }, { "epoch": 0.97, "learning_rate": 4.5126211896235484e-05, "loss": 0.8212, "step": 11160 }, { "epoch": 0.98, "learning_rate": 4.512184470259411e-05, "loss": 0.887, "step": 11170 }, { "epoch": 0.98, "learning_rate": 4.511747750895275e-05, "loss": 1.0557, "step": 11180 }, { "epoch": 0.98, "learning_rate": 4.511311031531138e-05, "loss": 0.9703, "step": 11190 }, { "epoch": 0.98, "learning_rate": 4.5108743121670016e-05, "loss": 0.9554, "step": 11200 }, { "epoch": 0.98, "learning_rate": 4.510437592802865e-05, "loss": 1.0286, "step": 11210 }, { "epoch": 0.98, "learning_rate": 4.510000873438728e-05, "loss": 1.0108, "step": 11220 }, { "epoch": 0.98, "learning_rate": 4.509564154074592e-05, "loss": 0.8789, "step": 11230 }, { "epoch": 0.98, "learning_rate": 4.509127434710455e-05, "loss": 0.9179, "step": 11240 }, { "epoch": 0.98, "learning_rate": 4.508690715346319e-05, "loss": 0.9582, "step": 11250 }, { "epoch": 0.98, "learning_rate": 4.508253995982182e-05, "loss": 0.9739, "step": 11260 }, { "epoch": 0.98, "learning_rate": 4.5078172766180456e-05, "loss": 0.8952, "step": 11270 }, { "epoch": 0.99, "learning_rate": 4.507380557253909e-05, "loss": 0.8655, "step": 11280 }, { "epoch": 0.99, "learning_rate": 4.506943837889772e-05, "loss": 0.9757, "step": 11290 }, { "epoch": 0.99, "learning_rate": 4.5065071185256355e-05, "loss": 0.9027, "step": 11300 }, { "epoch": 0.99, "learning_rate": 4.506070399161499e-05, "loss": 0.931, "step": 11310 }, { "epoch": 0.99, "learning_rate": 4.505633679797363e-05, "loss": 0.9226, "step": 11320 }, { "epoch": 0.99, "learning_rate": 4.5051969604332255e-05, "loss": 0.9397, "step": 11330 }, { "epoch": 0.99, "learning_rate": 4.5047602410690895e-05, "loss": 0.9259, "step": 11340 }, { "epoch": 0.99, "learning_rate": 4.504323521704953e-05, "loss": 1.0322, "step": 11350 }, { "epoch": 0.99, "learning_rate": 4.503886802340816e-05, "loss": 0.8922, "step": 11360 }, { "epoch": 0.99, "learning_rate": 4.5034500829766794e-05, "loss": 0.8602, "step": 11370 }, { "epoch": 0.99, "learning_rate": 4.503013363612543e-05, "loss": 0.9701, "step": 11380 }, { "epoch": 0.99, "learning_rate": 4.502576644248406e-05, "loss": 0.96, "step": 11390 }, { "epoch": 1.0, "learning_rate": 4.5021399248842694e-05, "loss": 0.8228, "step": 11400 }, { "epoch": 1.0, "learning_rate": 4.501703205520133e-05, "loss": 1.0048, "step": 11410 }, { "epoch": 1.0, "learning_rate": 4.501266486155996e-05, "loss": 0.9371, "step": 11420 }, { "epoch": 1.0, "learning_rate": 4.50082976679186e-05, "loss": 0.9576, "step": 11430 }, { "epoch": 1.0, "learning_rate": 4.5003930474277234e-05, "loss": 0.9371, "step": 11440 }, { "epoch": 1.0, "learning_rate": 4.499956328063587e-05, "loss": 0.9208, "step": 11450 }, { "epoch": 1.0, "learning_rate": 4.49951960869945e-05, "loss": 0.8842, "step": 11460 }, { "epoch": 1.0, "learning_rate": 4.499082889335313e-05, "loss": 0.9082, "step": 11470 }, { "epoch": 1.0, "learning_rate": 4.4986461699711766e-05, "loss": 0.9201, "step": 11480 }, { "epoch": 1.0, "learning_rate": 4.49820945060704e-05, "loss": 0.9304, "step": 11490 }, { "epoch": 1.0, "learning_rate": 4.497772731242903e-05, "loss": 0.826, "step": 11500 }, { "epoch": 1.01, "learning_rate": 4.497336011878767e-05, "loss": 0.958, "step": 11510 }, { "epoch": 1.01, "learning_rate": 4.4968992925146306e-05, "loss": 0.9367, "step": 11520 }, { "epoch": 1.01, "learning_rate": 4.496462573150494e-05, "loss": 0.8309, "step": 11530 }, { "epoch": 1.01, "learning_rate": 4.496025853786357e-05, "loss": 0.8894, "step": 11540 }, { "epoch": 1.01, "learning_rate": 4.4955891344222206e-05, "loss": 0.882, "step": 11550 }, { "epoch": 1.01, "learning_rate": 4.495152415058084e-05, "loss": 1.0092, "step": 11560 }, { "epoch": 1.01, "learning_rate": 4.494715695693947e-05, "loss": 0.8797, "step": 11570 }, { "epoch": 1.01, "learning_rate": 4.4942789763298105e-05, "loss": 0.9061, "step": 11580 }, { "epoch": 1.01, "learning_rate": 4.493842256965674e-05, "loss": 0.9299, "step": 11590 }, { "epoch": 1.01, "learning_rate": 4.493405537601538e-05, "loss": 0.8721, "step": 11600 }, { "epoch": 1.01, "learning_rate": 4.4929688182374005e-05, "loss": 0.8272, "step": 11610 }, { "epoch": 1.01, "learning_rate": 4.4925320988732645e-05, "loss": 0.9526, "step": 11620 }, { "epoch": 1.02, "learning_rate": 4.492095379509128e-05, "loss": 0.9967, "step": 11630 }, { "epoch": 1.02, "learning_rate": 4.491658660144991e-05, "loss": 0.9281, "step": 11640 }, { "epoch": 1.02, "learning_rate": 4.4912219407808544e-05, "loss": 0.8985, "step": 11650 }, { "epoch": 1.02, "learning_rate": 4.490785221416718e-05, "loss": 1.0259, "step": 11660 }, { "epoch": 1.02, "learning_rate": 4.490348502052581e-05, "loss": 0.9732, "step": 11670 }, { "epoch": 1.02, "learning_rate": 4.4899117826884444e-05, "loss": 0.9151, "step": 11680 }, { "epoch": 1.02, "learning_rate": 4.4894750633243084e-05, "loss": 0.9595, "step": 11690 }, { "epoch": 1.02, "learning_rate": 4.489038343960171e-05, "loss": 0.9329, "step": 11700 }, { "epoch": 1.02, "learning_rate": 4.488601624596035e-05, "loss": 0.9071, "step": 11710 }, { "epoch": 1.02, "learning_rate": 4.4881649052318984e-05, "loss": 0.9468, "step": 11720 }, { "epoch": 1.02, "learning_rate": 4.487728185867762e-05, "loss": 0.9363, "step": 11730 }, { "epoch": 1.03, "learning_rate": 4.487291466503625e-05, "loss": 0.8816, "step": 11740 }, { "epoch": 1.03, "learning_rate": 4.486854747139488e-05, "loss": 0.7856, "step": 11750 }, { "epoch": 1.03, "learning_rate": 4.486418027775352e-05, "loss": 0.8097, "step": 11760 }, { "epoch": 1.03, "learning_rate": 4.485981308411215e-05, "loss": 0.8906, "step": 11770 }, { "epoch": 1.03, "learning_rate": 4.485544589047079e-05, "loss": 0.9801, "step": 11780 }, { "epoch": 1.03, "learning_rate": 4.4851078696829416e-05, "loss": 0.9034, "step": 11790 }, { "epoch": 1.03, "learning_rate": 4.4846711503188056e-05, "loss": 0.8384, "step": 11800 }, { "epoch": 1.03, "learning_rate": 4.484234430954668e-05, "loss": 0.8314, "step": 11810 }, { "epoch": 1.03, "learning_rate": 4.483797711590532e-05, "loss": 0.9133, "step": 11820 }, { "epoch": 1.03, "learning_rate": 4.4833609922263956e-05, "loss": 1.032, "step": 11830 }, { "epoch": 1.03, "learning_rate": 4.482924272862259e-05, "loss": 0.8315, "step": 11840 }, { "epoch": 1.04, "learning_rate": 4.482487553498123e-05, "loss": 0.9895, "step": 11850 }, { "epoch": 1.04, "learning_rate": 4.4820508341339855e-05, "loss": 0.9437, "step": 11860 }, { "epoch": 1.04, "learning_rate": 4.4816141147698495e-05, "loss": 0.8944, "step": 11870 }, { "epoch": 1.04, "learning_rate": 4.481177395405712e-05, "loss": 0.9036, "step": 11880 }, { "epoch": 1.04, "learning_rate": 4.480740676041576e-05, "loss": 0.8344, "step": 11890 }, { "epoch": 1.04, "learning_rate": 4.480303956677439e-05, "loss": 1.0339, "step": 11900 }, { "epoch": 1.04, "learning_rate": 4.479867237313303e-05, "loss": 0.8336, "step": 11910 }, { "epoch": 1.04, "learning_rate": 4.479430517949166e-05, "loss": 0.9073, "step": 11920 }, { "epoch": 1.04, "learning_rate": 4.4789937985850294e-05, "loss": 0.9492, "step": 11930 }, { "epoch": 1.04, "learning_rate": 4.478557079220893e-05, "loss": 0.956, "step": 11940 }, { "epoch": 1.04, "learning_rate": 4.478120359856756e-05, "loss": 0.9705, "step": 11950 }, { "epoch": 1.04, "learning_rate": 4.47768364049262e-05, "loss": 0.9705, "step": 11960 }, { "epoch": 1.05, "learning_rate": 4.477246921128483e-05, "loss": 0.9774, "step": 11970 }, { "epoch": 1.05, "learning_rate": 4.476810201764347e-05, "loss": 0.9664, "step": 11980 }, { "epoch": 1.05, "learning_rate": 4.4763734824002094e-05, "loss": 0.8447, "step": 11990 }, { "epoch": 1.05, "learning_rate": 4.4759367630360734e-05, "loss": 0.9623, "step": 12000 }, { "epoch": 1.05, "learning_rate": 4.475500043671937e-05, "loss": 0.8843, "step": 12010 }, { "epoch": 1.05, "learning_rate": 4.4750633243078e-05, "loss": 0.9471, "step": 12020 }, { "epoch": 1.05, "learning_rate": 4.474626604943663e-05, "loss": 0.8741, "step": 12030 }, { "epoch": 1.05, "learning_rate": 4.4741898855795266e-05, "loss": 0.8642, "step": 12040 }, { "epoch": 1.05, "learning_rate": 4.4737531662153906e-05, "loss": 0.8563, "step": 12050 }, { "epoch": 1.05, "learning_rate": 4.473316446851253e-05, "loss": 0.9213, "step": 12060 }, { "epoch": 1.05, "learning_rate": 4.472879727487117e-05, "loss": 0.9986, "step": 12070 }, { "epoch": 1.06, "learning_rate": 4.47244300812298e-05, "loss": 0.91, "step": 12080 }, { "epoch": 1.06, "learning_rate": 4.472006288758844e-05, "loss": 0.9023, "step": 12090 }, { "epoch": 1.06, "learning_rate": 4.471569569394707e-05, "loss": 0.976, "step": 12100 }, { "epoch": 1.06, "learning_rate": 4.4711328500305706e-05, "loss": 0.8676, "step": 12110 }, { "epoch": 1.06, "learning_rate": 4.470696130666434e-05, "loss": 0.9281, "step": 12120 }, { "epoch": 1.06, "learning_rate": 4.470259411302297e-05, "loss": 0.9033, "step": 12130 }, { "epoch": 1.06, "learning_rate": 4.4698226919381605e-05, "loss": 0.8466, "step": 12140 }, { "epoch": 1.06, "learning_rate": 4.469385972574024e-05, "loss": 0.8466, "step": 12150 }, { "epoch": 1.06, "learning_rate": 4.468949253209888e-05, "loss": 0.9056, "step": 12160 }, { "epoch": 1.06, "learning_rate": 4.468512533845751e-05, "loss": 0.9986, "step": 12170 }, { "epoch": 1.06, "learning_rate": 4.4680758144816145e-05, "loss": 0.9672, "step": 12180 }, { "epoch": 1.06, "learning_rate": 4.467639095117478e-05, "loss": 1.0553, "step": 12190 }, { "epoch": 1.07, "learning_rate": 4.467202375753341e-05, "loss": 0.9605, "step": 12200 }, { "epoch": 1.07, "learning_rate": 4.4667656563892044e-05, "loss": 0.8074, "step": 12210 }, { "epoch": 1.07, "learning_rate": 4.466328937025068e-05, "loss": 0.9541, "step": 12220 }, { "epoch": 1.07, "learning_rate": 4.465892217660931e-05, "loss": 0.9729, "step": 12230 }, { "epoch": 1.07, "learning_rate": 4.4654554982967944e-05, "loss": 0.9304, "step": 12240 }, { "epoch": 1.07, "learning_rate": 4.4650187789326584e-05, "loss": 0.9546, "step": 12250 }, { "epoch": 1.07, "learning_rate": 4.464582059568522e-05, "loss": 0.9757, "step": 12260 }, { "epoch": 1.07, "learning_rate": 4.464145340204385e-05, "loss": 1.065, "step": 12270 }, { "epoch": 1.07, "learning_rate": 4.4637086208402484e-05, "loss": 1.0735, "step": 12280 }, { "epoch": 1.07, "learning_rate": 4.463271901476112e-05, "loss": 1.0065, "step": 12290 }, { "epoch": 1.07, "learning_rate": 4.462835182111975e-05, "loss": 0.8726, "step": 12300 }, { "epoch": 1.08, "learning_rate": 4.462398462747838e-05, "loss": 0.9266, "step": 12310 }, { "epoch": 1.08, "learning_rate": 4.4619617433837016e-05, "loss": 0.8507, "step": 12320 }, { "epoch": 1.08, "learning_rate": 4.4615250240195656e-05, "loss": 0.8866, "step": 12330 }, { "epoch": 1.08, "learning_rate": 4.461088304655428e-05, "loss": 0.9401, "step": 12340 }, { "epoch": 1.08, "learning_rate": 4.460651585291292e-05, "loss": 0.8221, "step": 12350 }, { "epoch": 1.08, "learning_rate": 4.4602148659271556e-05, "loss": 0.9033, "step": 12360 }, { "epoch": 1.08, "learning_rate": 4.459778146563019e-05, "loss": 0.9285, "step": 12370 }, { "epoch": 1.08, "learning_rate": 4.459341427198882e-05, "loss": 0.9238, "step": 12380 }, { "epoch": 1.08, "learning_rate": 4.4589047078347456e-05, "loss": 0.9655, "step": 12390 }, { "epoch": 1.08, "learning_rate": 4.458467988470609e-05, "loss": 0.9495, "step": 12400 }, { "epoch": 1.08, "learning_rate": 4.458031269106472e-05, "loss": 0.9983, "step": 12410 }, { "epoch": 1.08, "learning_rate": 4.457594549742336e-05, "loss": 0.9928, "step": 12420 }, { "epoch": 1.09, "learning_rate": 4.457157830378199e-05, "loss": 0.9189, "step": 12430 }, { "epoch": 1.09, "learning_rate": 4.456721111014063e-05, "loss": 0.9271, "step": 12440 }, { "epoch": 1.09, "learning_rate": 4.456284391649926e-05, "loss": 0.8451, "step": 12450 }, { "epoch": 1.09, "learning_rate": 4.4558476722857895e-05, "loss": 0.893, "step": 12460 }, { "epoch": 1.09, "learning_rate": 4.455410952921653e-05, "loss": 0.8496, "step": 12470 }, { "epoch": 1.09, "learning_rate": 4.454974233557516e-05, "loss": 0.8163, "step": 12480 }, { "epoch": 1.09, "learning_rate": 4.4545375141933794e-05, "loss": 0.9072, "step": 12490 }, { "epoch": 1.09, "learning_rate": 4.454100794829243e-05, "loss": 0.8713, "step": 12500 }, { "epoch": 1.09, "learning_rate": 4.453664075465107e-05, "loss": 0.982, "step": 12510 }, { "epoch": 1.09, "learning_rate": 4.4532273561009694e-05, "loss": 0.9044, "step": 12520 }, { "epoch": 1.09, "learning_rate": 4.4527906367368334e-05, "loss": 0.9728, "step": 12530 }, { "epoch": 1.1, "learning_rate": 4.452353917372696e-05, "loss": 0.936, "step": 12540 }, { "epoch": 1.1, "learning_rate": 4.45191719800856e-05, "loss": 0.851, "step": 12550 }, { "epoch": 1.1, "learning_rate": 4.4514804786444234e-05, "loss": 0.8592, "step": 12560 }, { "epoch": 1.1, "learning_rate": 4.451043759280287e-05, "loss": 0.9521, "step": 12570 }, { "epoch": 1.1, "learning_rate": 4.450607039916151e-05, "loss": 0.8226, "step": 12580 }, { "epoch": 1.1, "learning_rate": 4.450170320552013e-05, "loss": 0.8988, "step": 12590 }, { "epoch": 1.1, "learning_rate": 4.449733601187877e-05, "loss": 1.1016, "step": 12600 }, { "epoch": 1.1, "learning_rate": 4.44929688182374e-05, "loss": 0.8615, "step": 12610 }, { "epoch": 1.1, "learning_rate": 4.448860162459604e-05, "loss": 0.8843, "step": 12620 }, { "epoch": 1.1, "learning_rate": 4.4484234430954666e-05, "loss": 0.9107, "step": 12630 }, { "epoch": 1.1, "learning_rate": 4.4479867237313306e-05, "loss": 0.8771, "step": 12640 }, { "epoch": 1.1, "learning_rate": 4.447550004367194e-05, "loss": 0.8495, "step": 12650 }, { "epoch": 1.11, "learning_rate": 4.447113285003057e-05, "loss": 0.8912, "step": 12660 }, { "epoch": 1.11, "learning_rate": 4.4466765656389206e-05, "loss": 0.9469, "step": 12670 }, { "epoch": 1.11, "learning_rate": 4.446239846274784e-05, "loss": 0.9458, "step": 12680 }, { "epoch": 1.11, "learning_rate": 4.445803126910648e-05, "loss": 1.0344, "step": 12690 }, { "epoch": 1.11, "learning_rate": 4.4453664075465105e-05, "loss": 0.9375, "step": 12700 }, { "epoch": 1.11, "learning_rate": 4.4449296881823745e-05, "loss": 0.9414, "step": 12710 }, { "epoch": 1.11, "learning_rate": 4.444492968818237e-05, "loss": 0.9428, "step": 12720 }, { "epoch": 1.11, "learning_rate": 4.444056249454101e-05, "loss": 0.9601, "step": 12730 }, { "epoch": 1.11, "learning_rate": 4.4436195300899645e-05, "loss": 0.9152, "step": 12740 }, { "epoch": 1.11, "learning_rate": 4.443182810725828e-05, "loss": 0.8413, "step": 12750 }, { "epoch": 1.11, "learning_rate": 4.442746091361691e-05, "loss": 0.9359, "step": 12760 }, { "epoch": 1.12, "learning_rate": 4.4423093719975544e-05, "loss": 0.9442, "step": 12770 }, { "epoch": 1.12, "learning_rate": 4.4418726526334184e-05, "loss": 0.8774, "step": 12780 }, { "epoch": 1.12, "learning_rate": 4.441435933269281e-05, "loss": 0.9719, "step": 12790 }, { "epoch": 1.12, "learning_rate": 4.440999213905145e-05, "loss": 0.855, "step": 12800 }, { "epoch": 1.12, "learning_rate": 4.440562494541008e-05, "loss": 1.0001, "step": 12810 }, { "epoch": 1.12, "learning_rate": 4.440125775176872e-05, "loss": 0.9675, "step": 12820 }, { "epoch": 1.12, "learning_rate": 4.439689055812735e-05, "loss": 0.8379, "step": 12830 }, { "epoch": 1.12, "learning_rate": 4.4392523364485984e-05, "loss": 0.8493, "step": 12840 }, { "epoch": 1.12, "learning_rate": 4.438815617084462e-05, "loss": 0.9467, "step": 12850 }, { "epoch": 1.12, "learning_rate": 4.438378897720325e-05, "loss": 0.8218, "step": 12860 }, { "epoch": 1.12, "learning_rate": 4.437942178356188e-05, "loss": 1.018, "step": 12870 }, { "epoch": 1.12, "learning_rate": 4.4375054589920517e-05, "loss": 0.8891, "step": 12880 }, { "epoch": 1.13, "learning_rate": 4.4370687396279156e-05, "loss": 0.859, "step": 12890 }, { "epoch": 1.13, "learning_rate": 4.436632020263778e-05, "loss": 0.9007, "step": 12900 }, { "epoch": 1.13, "learning_rate": 4.436195300899642e-05, "loss": 0.9974, "step": 12910 }, { "epoch": 1.13, "learning_rate": 4.4357585815355056e-05, "loss": 0.7639, "step": 12920 }, { "epoch": 1.13, "learning_rate": 4.435321862171369e-05, "loss": 0.7397, "step": 12930 }, { "epoch": 1.13, "learning_rate": 4.434885142807232e-05, "loss": 0.9167, "step": 12940 }, { "epoch": 1.13, "learning_rate": 4.4344484234430956e-05, "loss": 0.9047, "step": 12950 }, { "epoch": 1.13, "learning_rate": 4.434011704078959e-05, "loss": 0.8814, "step": 12960 }, { "epoch": 1.13, "learning_rate": 4.433574984714822e-05, "loss": 0.8639, "step": 12970 }, { "epoch": 1.13, "learning_rate": 4.433138265350686e-05, "loss": 0.7749, "step": 12980 }, { "epoch": 1.13, "learning_rate": 4.4327015459865495e-05, "loss": 0.9552, "step": 12990 }, { "epoch": 1.14, "learning_rate": 4.432264826622413e-05, "loss": 0.985, "step": 13000 }, { "epoch": 1.14, "learning_rate": 4.431828107258276e-05, "loss": 1.0176, "step": 13010 }, { "epoch": 1.14, "learning_rate": 4.4313913878941395e-05, "loss": 0.9972, "step": 13020 }, { "epoch": 1.14, "learning_rate": 4.430954668530003e-05, "loss": 0.9697, "step": 13030 }, { "epoch": 1.14, "learning_rate": 4.430517949165866e-05, "loss": 0.9851, "step": 13040 }, { "epoch": 1.14, "learning_rate": 4.4300812298017295e-05, "loss": 0.8945, "step": 13050 }, { "epoch": 1.14, "learning_rate": 4.429644510437593e-05, "loss": 0.8953, "step": 13060 }, { "epoch": 1.14, "learning_rate": 4.429207791073456e-05, "loss": 0.8801, "step": 13070 }, { "epoch": 1.14, "learning_rate": 4.42877107170932e-05, "loss": 0.9492, "step": 13080 }, { "epoch": 1.14, "learning_rate": 4.4283343523451834e-05, "loss": 0.9237, "step": 13090 }, { "epoch": 1.14, "learning_rate": 4.427897632981047e-05, "loss": 0.8587, "step": 13100 }, { "epoch": 1.15, "learning_rate": 4.42746091361691e-05, "loss": 1.0227, "step": 13110 }, { "epoch": 1.15, "learning_rate": 4.4270241942527734e-05, "loss": 0.9383, "step": 13120 }, { "epoch": 1.15, "learning_rate": 4.426587474888637e-05, "loss": 0.902, "step": 13130 }, { "epoch": 1.15, "learning_rate": 4.4261507555245e-05, "loss": 0.8372, "step": 13140 }, { "epoch": 1.15, "learning_rate": 4.425714036160363e-05, "loss": 0.9497, "step": 13150 }, { "epoch": 1.15, "learning_rate": 4.4252773167962267e-05, "loss": 0.9441, "step": 13160 }, { "epoch": 1.15, "learning_rate": 4.4248405974320906e-05, "loss": 0.9118, "step": 13170 }, { "epoch": 1.15, "learning_rate": 4.424403878067954e-05, "loss": 0.8783, "step": 13180 }, { "epoch": 1.15, "learning_rate": 4.423967158703817e-05, "loss": 0.9718, "step": 13190 }, { "epoch": 1.15, "learning_rate": 4.4235304393396806e-05, "loss": 0.9306, "step": 13200 }, { "epoch": 1.15, "learning_rate": 4.423093719975544e-05, "loss": 0.9114, "step": 13210 }, { "epoch": 1.15, "learning_rate": 4.422657000611407e-05, "loss": 0.8427, "step": 13220 }, { "epoch": 1.16, "learning_rate": 4.4222202812472706e-05, "loss": 1.0131, "step": 13230 }, { "epoch": 1.16, "learning_rate": 4.4217835618831346e-05, "loss": 0.8891, "step": 13240 }, { "epoch": 1.16, "learning_rate": 4.421346842518997e-05, "loss": 0.8747, "step": 13250 }, { "epoch": 1.16, "learning_rate": 4.420910123154861e-05, "loss": 0.8912, "step": 13260 }, { "epoch": 1.16, "learning_rate": 4.420473403790724e-05, "loss": 0.9641, "step": 13270 }, { "epoch": 1.16, "learning_rate": 4.420036684426588e-05, "loss": 0.9093, "step": 13280 }, { "epoch": 1.16, "learning_rate": 4.419599965062451e-05, "loss": 0.9304, "step": 13290 }, { "epoch": 1.16, "learning_rate": 4.4191632456983145e-05, "loss": 0.8958, "step": 13300 }, { "epoch": 1.16, "learning_rate": 4.418726526334178e-05, "loss": 0.7827, "step": 13310 }, { "epoch": 1.16, "learning_rate": 4.418289806970041e-05, "loss": 0.9512, "step": 13320 }, { "epoch": 1.16, "learning_rate": 4.417853087605905e-05, "loss": 0.8458, "step": 13330 }, { "epoch": 1.17, "learning_rate": 4.417416368241768e-05, "loss": 0.9175, "step": 13340 }, { "epoch": 1.17, "learning_rate": 4.416979648877632e-05, "loss": 0.8291, "step": 13350 }, { "epoch": 1.17, "learning_rate": 4.4165429295134944e-05, "loss": 0.831, "step": 13360 }, { "epoch": 1.17, "learning_rate": 4.4161062101493584e-05, "loss": 0.8486, "step": 13370 }, { "epoch": 1.17, "learning_rate": 4.415669490785222e-05, "loss": 0.9529, "step": 13380 }, { "epoch": 1.17, "learning_rate": 4.415232771421085e-05, "loss": 0.8852, "step": 13390 }, { "epoch": 1.17, "learning_rate": 4.4147960520569484e-05, "loss": 0.9277, "step": 13400 }, { "epoch": 1.17, "learning_rate": 4.414359332692812e-05, "loss": 0.8877, "step": 13410 }, { "epoch": 1.17, "learning_rate": 4.413922613328676e-05, "loss": 0.9588, "step": 13420 }, { "epoch": 1.17, "learning_rate": 4.413485893964538e-05, "loss": 0.8317, "step": 13430 }, { "epoch": 1.17, "learning_rate": 4.413049174600402e-05, "loss": 0.8091, "step": 13440 }, { "epoch": 1.17, "learning_rate": 4.412612455236265e-05, "loss": 0.9759, "step": 13450 }, { "epoch": 1.18, "learning_rate": 4.412175735872129e-05, "loss": 0.8775, "step": 13460 }, { "epoch": 1.18, "learning_rate": 4.4117390165079916e-05, "loss": 1.0209, "step": 13470 }, { "epoch": 1.18, "learning_rate": 4.4113022971438556e-05, "loss": 0.8956, "step": 13480 }, { "epoch": 1.18, "learning_rate": 4.410865577779719e-05, "loss": 0.8969, "step": 13490 }, { "epoch": 1.18, "learning_rate": 4.410428858415582e-05, "loss": 0.9865, "step": 13500 }, { "epoch": 1.18, "learning_rate": 4.409992139051446e-05, "loss": 0.7589, "step": 13510 }, { "epoch": 1.18, "learning_rate": 4.409555419687309e-05, "loss": 0.9255, "step": 13520 }, { "epoch": 1.18, "learning_rate": 4.409118700323173e-05, "loss": 1.09, "step": 13530 }, { "epoch": 1.18, "learning_rate": 4.4086819809590355e-05, "loss": 1.0017, "step": 13540 }, { "epoch": 1.18, "learning_rate": 4.4082452615948995e-05, "loss": 1.0634, "step": 13550 }, { "epoch": 1.18, "learning_rate": 4.407808542230762e-05, "loss": 0.8959, "step": 13560 }, { "epoch": 1.19, "learning_rate": 4.407371822866626e-05, "loss": 0.8688, "step": 13570 }, { "epoch": 1.19, "learning_rate": 4.4069351035024895e-05, "loss": 0.9134, "step": 13580 }, { "epoch": 1.19, "learning_rate": 4.406498384138353e-05, "loss": 0.9179, "step": 13590 }, { "epoch": 1.19, "learning_rate": 4.406061664774216e-05, "loss": 0.9349, "step": 13600 }, { "epoch": 1.19, "learning_rate": 4.4056249454100795e-05, "loss": 0.962, "step": 13610 }, { "epoch": 1.19, "learning_rate": 4.4051882260459435e-05, "loss": 0.8952, "step": 13620 }, { "epoch": 1.19, "learning_rate": 4.404751506681806e-05, "loss": 0.9066, "step": 13630 }, { "epoch": 1.19, "learning_rate": 4.40431478731767e-05, "loss": 0.9379, "step": 13640 }, { "epoch": 1.19, "learning_rate": 4.4038780679535334e-05, "loss": 0.9543, "step": 13650 }, { "epoch": 1.19, "learning_rate": 4.403441348589397e-05, "loss": 0.8335, "step": 13660 }, { "epoch": 1.19, "learning_rate": 4.40300462922526e-05, "loss": 0.8901, "step": 13670 }, { "epoch": 1.19, "learning_rate": 4.4025679098611234e-05, "loss": 1.0229, "step": 13680 }, { "epoch": 1.2, "learning_rate": 4.402131190496987e-05, "loss": 0.9643, "step": 13690 }, { "epoch": 1.2, "learning_rate": 4.40169447113285e-05, "loss": 0.8058, "step": 13700 }, { "epoch": 1.2, "learning_rate": 4.401257751768714e-05, "loss": 1.1178, "step": 13710 }, { "epoch": 1.2, "learning_rate": 4.4008210324045767e-05, "loss": 0.934, "step": 13720 }, { "epoch": 1.2, "learning_rate": 4.4003843130404407e-05, "loss": 0.8609, "step": 13730 }, { "epoch": 1.2, "learning_rate": 4.399947593676304e-05, "loss": 0.9171, "step": 13740 }, { "epoch": 1.2, "learning_rate": 4.399510874312167e-05, "loss": 0.9381, "step": 13750 }, { "epoch": 1.2, "learning_rate": 4.3990741549480306e-05, "loss": 0.9924, "step": 13760 }, { "epoch": 1.2, "learning_rate": 4.398637435583894e-05, "loss": 0.9531, "step": 13770 }, { "epoch": 1.2, "learning_rate": 4.398200716219757e-05, "loss": 0.8536, "step": 13780 }, { "epoch": 1.2, "learning_rate": 4.3977639968556206e-05, "loss": 0.7362, "step": 13790 }, { "epoch": 1.21, "learning_rate": 4.397327277491484e-05, "loss": 0.8746, "step": 13800 }, { "epoch": 1.21, "learning_rate": 4.396890558127348e-05, "loss": 0.8384, "step": 13810 }, { "epoch": 1.21, "learning_rate": 4.396453838763211e-05, "loss": 1.0544, "step": 13820 }, { "epoch": 1.21, "learning_rate": 4.3960171193990745e-05, "loss": 0.7399, "step": 13830 }, { "epoch": 1.21, "learning_rate": 4.395580400034938e-05, "loss": 0.9517, "step": 13840 }, { "epoch": 1.21, "learning_rate": 4.395143680670801e-05, "loss": 0.7535, "step": 13850 }, { "epoch": 1.21, "learning_rate": 4.3947069613066645e-05, "loss": 0.9315, "step": 13860 }, { "epoch": 1.21, "learning_rate": 4.394270241942528e-05, "loss": 0.8628, "step": 13870 }, { "epoch": 1.21, "learning_rate": 4.393833522578391e-05, "loss": 0.9181, "step": 13880 }, { "epoch": 1.21, "learning_rate": 4.3933968032142545e-05, "loss": 0.9031, "step": 13890 }, { "epoch": 1.21, "learning_rate": 4.3929600838501185e-05, "loss": 0.9423, "step": 13900 }, { "epoch": 1.21, "learning_rate": 4.392523364485982e-05, "loss": 0.8268, "step": 13910 }, { "epoch": 1.22, "learning_rate": 4.392086645121845e-05, "loss": 0.9842, "step": 13920 }, { "epoch": 1.22, "learning_rate": 4.3916499257577084e-05, "loss": 0.8407, "step": 13930 }, { "epoch": 1.22, "learning_rate": 4.391213206393572e-05, "loss": 0.9953, "step": 13940 }, { "epoch": 1.22, "learning_rate": 4.390776487029435e-05, "loss": 0.9934, "step": 13950 }, { "epoch": 1.22, "learning_rate": 4.3903397676652984e-05, "loss": 0.9091, "step": 13960 }, { "epoch": 1.22, "learning_rate": 4.389903048301162e-05, "loss": 0.9382, "step": 13970 }, { "epoch": 1.22, "learning_rate": 4.389466328937025e-05, "loss": 0.9491, "step": 13980 }, { "epoch": 1.22, "learning_rate": 4.389029609572889e-05, "loss": 0.9556, "step": 13990 }, { "epoch": 1.22, "learning_rate": 4.3885928902087517e-05, "loss": 0.9202, "step": 14000 }, { "epoch": 1.22, "learning_rate": 4.3881561708446157e-05, "loss": 0.8045, "step": 14010 }, { "epoch": 1.22, "learning_rate": 4.387719451480479e-05, "loss": 0.9044, "step": 14020 }, { "epoch": 1.23, "learning_rate": 4.387282732116342e-05, "loss": 0.9394, "step": 14030 }, { "epoch": 1.23, "learning_rate": 4.3868460127522056e-05, "loss": 0.885, "step": 14040 }, { "epoch": 1.23, "learning_rate": 4.386409293388069e-05, "loss": 0.9719, "step": 14050 }, { "epoch": 1.23, "learning_rate": 4.385972574023933e-05, "loss": 0.9355, "step": 14060 }, { "epoch": 1.23, "learning_rate": 4.3855358546597956e-05, "loss": 0.9881, "step": 14070 }, { "epoch": 1.23, "learning_rate": 4.3850991352956596e-05, "loss": 0.9022, "step": 14080 }, { "epoch": 1.23, "learning_rate": 4.384662415931522e-05, "loss": 0.9913, "step": 14090 }, { "epoch": 1.23, "learning_rate": 4.384225696567386e-05, "loss": 0.7881, "step": 14100 }, { "epoch": 1.23, "learning_rate": 4.3837889772032495e-05, "loss": 0.9161, "step": 14110 }, { "epoch": 1.23, "learning_rate": 4.383352257839113e-05, "loss": 0.8111, "step": 14120 }, { "epoch": 1.23, "learning_rate": 4.382915538474976e-05, "loss": 0.8397, "step": 14130 }, { "epoch": 1.24, "learning_rate": 4.3824788191108395e-05, "loss": 0.9204, "step": 14140 }, { "epoch": 1.24, "learning_rate": 4.3820420997467035e-05, "loss": 0.8167, "step": 14150 }, { "epoch": 1.24, "learning_rate": 4.381605380382566e-05, "loss": 0.982, "step": 14160 }, { "epoch": 1.24, "learning_rate": 4.38116866101843e-05, "loss": 0.9961, "step": 14170 }, { "epoch": 1.24, "learning_rate": 4.380731941654293e-05, "loss": 0.9445, "step": 14180 }, { "epoch": 1.24, "learning_rate": 4.380295222290157e-05, "loss": 0.936, "step": 14190 }, { "epoch": 1.24, "learning_rate": 4.3798585029260194e-05, "loss": 0.89, "step": 14200 }, { "epoch": 1.24, "learning_rate": 4.3794217835618834e-05, "loss": 0.9726, "step": 14210 }, { "epoch": 1.24, "learning_rate": 4.378985064197747e-05, "loss": 1.0089, "step": 14220 }, { "epoch": 1.24, "learning_rate": 4.37854834483361e-05, "loss": 0.9562, "step": 14230 }, { "epoch": 1.24, "learning_rate": 4.378111625469474e-05, "loss": 0.9268, "step": 14240 }, { "epoch": 1.24, "learning_rate": 4.377674906105337e-05, "loss": 0.9123, "step": 14250 }, { "epoch": 1.25, "learning_rate": 4.377238186741201e-05, "loss": 0.8134, "step": 14260 }, { "epoch": 1.25, "learning_rate": 4.376801467377063e-05, "loss": 0.8873, "step": 14270 }, { "epoch": 1.25, "learning_rate": 4.376364748012927e-05, "loss": 0.9168, "step": 14280 }, { "epoch": 1.25, "learning_rate": 4.37592802864879e-05, "loss": 0.8936, "step": 14290 }, { "epoch": 1.25, "learning_rate": 4.375491309284654e-05, "loss": 0.8572, "step": 14300 }, { "epoch": 1.25, "learning_rate": 4.375054589920517e-05, "loss": 0.7955, "step": 14310 }, { "epoch": 1.25, "learning_rate": 4.3746178705563806e-05, "loss": 0.9254, "step": 14320 }, { "epoch": 1.25, "learning_rate": 4.374181151192244e-05, "loss": 0.9034, "step": 14330 }, { "epoch": 1.25, "learning_rate": 4.373744431828107e-05, "loss": 0.9295, "step": 14340 }, { "epoch": 1.25, "learning_rate": 4.373307712463971e-05, "loss": 0.8735, "step": 14350 }, { "epoch": 1.25, "learning_rate": 4.372870993099834e-05, "loss": 1.0061, "step": 14360 }, { "epoch": 1.26, "learning_rate": 4.372434273735698e-05, "loss": 0.8904, "step": 14370 }, { "epoch": 1.26, "learning_rate": 4.3719975543715605e-05, "loss": 0.9275, "step": 14380 }, { "epoch": 1.26, "learning_rate": 4.3715608350074245e-05, "loss": 0.8618, "step": 14390 }, { "epoch": 1.26, "learning_rate": 4.371124115643288e-05, "loss": 0.8542, "step": 14400 }, { "epoch": 1.26, "learning_rate": 4.370687396279151e-05, "loss": 0.8597, "step": 14410 }, { "epoch": 1.26, "learning_rate": 4.3702506769150145e-05, "loss": 0.8854, "step": 14420 }, { "epoch": 1.26, "learning_rate": 4.369813957550878e-05, "loss": 0.8318, "step": 14430 }, { "epoch": 1.26, "learning_rate": 4.369377238186742e-05, "loss": 0.9554, "step": 14440 }, { "epoch": 1.26, "learning_rate": 4.3689405188226045e-05, "loss": 0.8539, "step": 14450 }, { "epoch": 1.26, "learning_rate": 4.3685037994584685e-05, "loss": 0.9091, "step": 14460 }, { "epoch": 1.26, "learning_rate": 4.368067080094332e-05, "loss": 0.8744, "step": 14470 }, { "epoch": 1.26, "learning_rate": 4.367630360730195e-05, "loss": 0.8784, "step": 14480 }, { "epoch": 1.27, "learning_rate": 4.3671936413660584e-05, "loss": 0.9389, "step": 14490 }, { "epoch": 1.27, "learning_rate": 4.366756922001922e-05, "loss": 0.9308, "step": 14500 }, { "epoch": 1.27, "learning_rate": 4.366320202637785e-05, "loss": 0.7971, "step": 14510 }, { "epoch": 1.27, "learning_rate": 4.3658834832736484e-05, "loss": 0.9154, "step": 14520 }, { "epoch": 1.27, "learning_rate": 4.365446763909512e-05, "loss": 0.9362, "step": 14530 }, { "epoch": 1.27, "learning_rate": 4.365010044545375e-05, "loss": 0.9265, "step": 14540 }, { "epoch": 1.27, "learning_rate": 4.364573325181239e-05, "loss": 0.9781, "step": 14550 }, { "epoch": 1.27, "learning_rate": 4.364136605817102e-05, "loss": 0.8626, "step": 14560 }, { "epoch": 1.27, "learning_rate": 4.3636998864529657e-05, "loss": 0.9103, "step": 14570 }, { "epoch": 1.27, "learning_rate": 4.363263167088829e-05, "loss": 0.9261, "step": 14580 }, { "epoch": 1.27, "learning_rate": 4.362826447724692e-05, "loss": 0.9449, "step": 14590 }, { "epoch": 1.28, "learning_rate": 4.3623897283605556e-05, "loss": 0.9019, "step": 14600 }, { "epoch": 1.28, "learning_rate": 4.361953008996419e-05, "loss": 0.9538, "step": 14610 }, { "epoch": 1.28, "learning_rate": 4.361516289632282e-05, "loss": 0.9009, "step": 14620 }, { "epoch": 1.28, "learning_rate": 4.361079570268146e-05, "loss": 0.8667, "step": 14630 }, { "epoch": 1.28, "learning_rate": 4.3606428509040096e-05, "loss": 0.86, "step": 14640 }, { "epoch": 1.28, "learning_rate": 4.360206131539873e-05, "loss": 0.8449, "step": 14650 }, { "epoch": 1.28, "learning_rate": 4.359769412175736e-05, "loss": 0.9632, "step": 14660 }, { "epoch": 1.28, "learning_rate": 4.3593326928115995e-05, "loss": 0.8477, "step": 14670 }, { "epoch": 1.28, "learning_rate": 4.358895973447463e-05, "loss": 0.8059, "step": 14680 }, { "epoch": 1.28, "learning_rate": 4.358459254083326e-05, "loss": 0.9049, "step": 14690 }, { "epoch": 1.28, "learning_rate": 4.3580225347191895e-05, "loss": 0.9158, "step": 14700 }, { "epoch": 1.28, "learning_rate": 4.357585815355053e-05, "loss": 0.9335, "step": 14710 }, { "epoch": 1.29, "learning_rate": 4.357149095990917e-05, "loss": 0.8919, "step": 14720 }, { "epoch": 1.29, "learning_rate": 4.3567123766267795e-05, "loss": 0.9624, "step": 14730 }, { "epoch": 1.29, "learning_rate": 4.3562756572626435e-05, "loss": 0.9487, "step": 14740 }, { "epoch": 1.29, "learning_rate": 4.355838937898507e-05, "loss": 0.7973, "step": 14750 }, { "epoch": 1.29, "learning_rate": 4.35540221853437e-05, "loss": 0.8796, "step": 14760 }, { "epoch": 1.29, "learning_rate": 4.3549654991702334e-05, "loss": 0.9861, "step": 14770 }, { "epoch": 1.29, "learning_rate": 4.354528779806097e-05, "loss": 0.8296, "step": 14780 }, { "epoch": 1.29, "learning_rate": 4.35409206044196e-05, "loss": 0.8536, "step": 14790 }, { "epoch": 1.29, "learning_rate": 4.3536553410778234e-05, "loss": 0.935, "step": 14800 }, { "epoch": 1.29, "learning_rate": 4.3532186217136874e-05, "loss": 0.8927, "step": 14810 }, { "epoch": 1.29, "learning_rate": 4.35278190234955e-05, "loss": 1.0031, "step": 14820 }, { "epoch": 1.3, "learning_rate": 4.352345182985414e-05, "loss": 0.9235, "step": 14830 }, { "epoch": 1.3, "learning_rate": 4.351908463621277e-05, "loss": 0.9993, "step": 14840 }, { "epoch": 1.3, "learning_rate": 4.3514717442571407e-05, "loss": 0.817, "step": 14850 }, { "epoch": 1.3, "learning_rate": 4.351035024893004e-05, "loss": 0.9763, "step": 14860 }, { "epoch": 1.3, "learning_rate": 4.350598305528867e-05, "loss": 0.8549, "step": 14870 }, { "epoch": 1.3, "learning_rate": 4.350161586164731e-05, "loss": 0.6822, "step": 14880 }, { "epoch": 1.3, "learning_rate": 4.349724866800594e-05, "loss": 0.9731, "step": 14890 }, { "epoch": 1.3, "learning_rate": 4.349288147436458e-05, "loss": 0.8859, "step": 14900 }, { "epoch": 1.3, "learning_rate": 4.3488514280723206e-05, "loss": 0.999, "step": 14910 }, { "epoch": 1.3, "learning_rate": 4.3484147087081846e-05, "loss": 0.8618, "step": 14920 }, { "epoch": 1.3, "learning_rate": 4.347977989344047e-05, "loss": 0.8044, "step": 14930 }, { "epoch": 1.3, "learning_rate": 4.347541269979911e-05, "loss": 0.9209, "step": 14940 }, { "epoch": 1.31, "learning_rate": 4.3471045506157745e-05, "loss": 0.8291, "step": 14950 }, { "epoch": 1.31, "learning_rate": 4.346667831251638e-05, "loss": 0.8276, "step": 14960 }, { "epoch": 1.31, "learning_rate": 4.346231111887502e-05, "loss": 1.0486, "step": 14970 }, { "epoch": 1.31, "learning_rate": 4.3457943925233645e-05, "loss": 1.0739, "step": 14980 }, { "epoch": 1.31, "learning_rate": 4.3453576731592285e-05, "loss": 0.9206, "step": 14990 }, { "epoch": 1.31, "learning_rate": 4.344920953795091e-05, "loss": 0.9701, "step": 15000 }, { "epoch": 1.31, "eval_accuracy": 0.572762110641643, "eval_loss": 0.9019526243209839, "eval_runtime": 84.1256, "eval_samples_per_second": 120.974, "eval_steps_per_second": 15.132, "step": 15000 }, { "epoch": 1.31, "learning_rate": 4.344484234430955e-05, "loss": 0.919, "step": 15010 }, { "epoch": 1.31, "learning_rate": 4.344047515066818e-05, "loss": 0.9125, "step": 15020 }, { "epoch": 1.31, "learning_rate": 4.343610795702682e-05, "loss": 0.8528, "step": 15030 }, { "epoch": 1.31, "learning_rate": 4.343174076338545e-05, "loss": 0.8604, "step": 15040 }, { "epoch": 1.31, "learning_rate": 4.3427373569744084e-05, "loss": 0.7717, "step": 15050 }, { "epoch": 1.32, "learning_rate": 4.342300637610272e-05, "loss": 0.9932, "step": 15060 }, { "epoch": 1.32, "learning_rate": 4.341863918246135e-05, "loss": 0.936, "step": 15070 }, { "epoch": 1.32, "learning_rate": 4.341427198881999e-05, "loss": 0.8852, "step": 15080 }, { "epoch": 1.32, "learning_rate": 4.340990479517862e-05, "loss": 0.8235, "step": 15090 }, { "epoch": 1.32, "learning_rate": 4.340553760153726e-05, "loss": 0.941, "step": 15100 }, { "epoch": 1.32, "learning_rate": 4.3401170407895883e-05, "loss": 0.9002, "step": 15110 }, { "epoch": 1.32, "learning_rate": 4.339680321425452e-05, "loss": 0.8642, "step": 15120 }, { "epoch": 1.32, "learning_rate": 4.3392436020613157e-05, "loss": 0.9763, "step": 15130 }, { "epoch": 1.32, "learning_rate": 4.338806882697179e-05, "loss": 0.9184, "step": 15140 }, { "epoch": 1.32, "learning_rate": 4.338370163333042e-05, "loss": 0.9508, "step": 15150 }, { "epoch": 1.32, "learning_rate": 4.3379334439689056e-05, "loss": 0.8666, "step": 15160 }, { "epoch": 1.33, "learning_rate": 4.3374967246047696e-05, "loss": 0.7512, "step": 15170 }, { "epoch": 1.33, "learning_rate": 4.337060005240632e-05, "loss": 0.8854, "step": 15180 }, { "epoch": 1.33, "learning_rate": 4.336623285876496e-05, "loss": 0.8542, "step": 15190 }, { "epoch": 1.33, "learning_rate": 4.336186566512359e-05, "loss": 0.9862, "step": 15200 }, { "epoch": 1.33, "learning_rate": 4.335749847148223e-05, "loss": 0.9491, "step": 15210 }, { "epoch": 1.33, "learning_rate": 4.335313127784086e-05, "loss": 0.8883, "step": 15220 }, { "epoch": 1.33, "learning_rate": 4.3348764084199495e-05, "loss": 0.9286, "step": 15230 }, { "epoch": 1.33, "learning_rate": 4.334439689055813e-05, "loss": 0.8865, "step": 15240 }, { "epoch": 1.33, "learning_rate": 4.334002969691676e-05, "loss": 0.9284, "step": 15250 }, { "epoch": 1.33, "learning_rate": 4.3335662503275395e-05, "loss": 0.9085, "step": 15260 }, { "epoch": 1.33, "learning_rate": 4.333129530963403e-05, "loss": 0.8477, "step": 15270 }, { "epoch": 1.33, "learning_rate": 4.332692811599267e-05, "loss": 0.9808, "step": 15280 }, { "epoch": 1.34, "learning_rate": 4.33225609223513e-05, "loss": 1.0297, "step": 15290 }, { "epoch": 1.34, "learning_rate": 4.3318193728709935e-05, "loss": 0.8064, "step": 15300 }, { "epoch": 1.34, "learning_rate": 4.331382653506857e-05, "loss": 0.7985, "step": 15310 }, { "epoch": 1.34, "learning_rate": 4.33094593414272e-05, "loss": 0.9677, "step": 15320 }, { "epoch": 1.34, "learning_rate": 4.3305092147785834e-05, "loss": 0.7872, "step": 15330 }, { "epoch": 1.34, "learning_rate": 4.330072495414447e-05, "loss": 0.7886, "step": 15340 }, { "epoch": 1.34, "learning_rate": 4.32963577605031e-05, "loss": 0.8831, "step": 15350 }, { "epoch": 1.34, "learning_rate": 4.3291990566861734e-05, "loss": 0.9471, "step": 15360 }, { "epoch": 1.34, "learning_rate": 4.3287623373220374e-05, "loss": 0.8748, "step": 15370 }, { "epoch": 1.34, "learning_rate": 4.328325617957901e-05, "loss": 0.8131, "step": 15380 }, { "epoch": 1.34, "learning_rate": 4.327888898593764e-05, "loss": 0.8532, "step": 15390 }, { "epoch": 1.35, "learning_rate": 4.327452179229627e-05, "loss": 0.9196, "step": 15400 }, { "epoch": 1.35, "learning_rate": 4.3270154598654907e-05, "loss": 1.0177, "step": 15410 }, { "epoch": 1.35, "learning_rate": 4.326578740501354e-05, "loss": 0.9363, "step": 15420 }, { "epoch": 1.35, "learning_rate": 4.326142021137217e-05, "loss": 0.8901, "step": 15430 }, { "epoch": 1.35, "learning_rate": 4.3257053017730806e-05, "loss": 0.9239, "step": 15440 }, { "epoch": 1.35, "learning_rate": 4.325268582408944e-05, "loss": 0.8278, "step": 15450 }, { "epoch": 1.35, "learning_rate": 4.324831863044807e-05, "loss": 0.9067, "step": 15460 }, { "epoch": 1.35, "learning_rate": 4.324395143680671e-05, "loss": 0.934, "step": 15470 }, { "epoch": 1.35, "learning_rate": 4.3239584243165346e-05, "loss": 0.8488, "step": 15480 }, { "epoch": 1.35, "learning_rate": 4.323521704952398e-05, "loss": 0.9234, "step": 15490 }, { "epoch": 1.35, "learning_rate": 4.323084985588261e-05, "loss": 1.0279, "step": 15500 }, { "epoch": 1.35, "learning_rate": 4.3226482662241245e-05, "loss": 0.8982, "step": 15510 }, { "epoch": 1.36, "learning_rate": 4.322211546859988e-05, "loss": 0.9552, "step": 15520 }, { "epoch": 1.36, "learning_rate": 4.321774827495851e-05, "loss": 0.9862, "step": 15530 }, { "epoch": 1.36, "learning_rate": 4.321338108131715e-05, "loss": 0.9822, "step": 15540 }, { "epoch": 1.36, "learning_rate": 4.320901388767578e-05, "loss": 0.8063, "step": 15550 }, { "epoch": 1.36, "learning_rate": 4.320464669403442e-05, "loss": 1.012, "step": 15560 }, { "epoch": 1.36, "learning_rate": 4.320027950039305e-05, "loss": 0.8776, "step": 15570 }, { "epoch": 1.36, "learning_rate": 4.3195912306751685e-05, "loss": 0.9512, "step": 15580 }, { "epoch": 1.36, "learning_rate": 4.319154511311032e-05, "loss": 0.9187, "step": 15590 }, { "epoch": 1.36, "learning_rate": 4.318717791946895e-05, "loss": 0.8636, "step": 15600 }, { "epoch": 1.36, "learning_rate": 4.3182810725827584e-05, "loss": 0.8431, "step": 15610 }, { "epoch": 1.36, "learning_rate": 4.317844353218622e-05, "loss": 0.8475, "step": 15620 }, { "epoch": 1.37, "learning_rate": 4.317407633854486e-05, "loss": 1.0569, "step": 15630 }, { "epoch": 1.37, "learning_rate": 4.3169709144903484e-05, "loss": 0.9097, "step": 15640 }, { "epoch": 1.37, "learning_rate": 4.3165341951262124e-05, "loss": 0.9391, "step": 15650 }, { "epoch": 1.37, "learning_rate": 4.316097475762075e-05, "loss": 1.0363, "step": 15660 }, { "epoch": 1.37, "learning_rate": 4.315660756397939e-05, "loss": 0.8641, "step": 15670 }, { "epoch": 1.37, "learning_rate": 4.3152240370338023e-05, "loss": 0.8073, "step": 15680 }, { "epoch": 1.37, "learning_rate": 4.3147873176696657e-05, "loss": 0.8456, "step": 15690 }, { "epoch": 1.37, "learning_rate": 4.3143505983055297e-05, "loss": 0.9166, "step": 15700 }, { "epoch": 1.37, "learning_rate": 4.313913878941392e-05, "loss": 0.9334, "step": 15710 }, { "epoch": 1.37, "learning_rate": 4.313477159577256e-05, "loss": 0.974, "step": 15720 }, { "epoch": 1.37, "learning_rate": 4.313040440213119e-05, "loss": 0.887, "step": 15730 }, { "epoch": 1.37, "learning_rate": 4.312603720848983e-05, "loss": 0.8785, "step": 15740 }, { "epoch": 1.38, "learning_rate": 4.3121670014848456e-05, "loss": 0.959, "step": 15750 }, { "epoch": 1.38, "learning_rate": 4.3117302821207096e-05, "loss": 0.9278, "step": 15760 }, { "epoch": 1.38, "learning_rate": 4.311293562756573e-05, "loss": 0.9141, "step": 15770 }, { "epoch": 1.38, "learning_rate": 4.310856843392436e-05, "loss": 0.9412, "step": 15780 }, { "epoch": 1.38, "learning_rate": 4.3104201240282995e-05, "loss": 0.9377, "step": 15790 }, { "epoch": 1.38, "learning_rate": 4.309983404664163e-05, "loss": 0.9439, "step": 15800 }, { "epoch": 1.38, "learning_rate": 4.309546685300027e-05, "loss": 0.9453, "step": 15810 }, { "epoch": 1.38, "learning_rate": 4.3091099659358895e-05, "loss": 0.9007, "step": 15820 }, { "epoch": 1.38, "learning_rate": 4.3086732465717535e-05, "loss": 0.8418, "step": 15830 }, { "epoch": 1.38, "learning_rate": 4.308236527207616e-05, "loss": 0.8998, "step": 15840 }, { "epoch": 1.38, "learning_rate": 4.30779980784348e-05, "loss": 0.8341, "step": 15850 }, { "epoch": 1.39, "learning_rate": 4.307363088479343e-05, "loss": 0.8769, "step": 15860 }, { "epoch": 1.39, "learning_rate": 4.306926369115207e-05, "loss": 0.8236, "step": 15870 }, { "epoch": 1.39, "learning_rate": 4.30648964975107e-05, "loss": 0.8204, "step": 15880 }, { "epoch": 1.39, "learning_rate": 4.3060529303869334e-05, "loss": 0.8364, "step": 15890 }, { "epoch": 1.39, "learning_rate": 4.3056162110227974e-05, "loss": 0.9732, "step": 15900 }, { "epoch": 1.39, "learning_rate": 4.30517949165866e-05, "loss": 0.8895, "step": 15910 }, { "epoch": 1.39, "learning_rate": 4.304742772294524e-05, "loss": 0.9245, "step": 15920 }, { "epoch": 1.39, "learning_rate": 4.304306052930387e-05, "loss": 0.9548, "step": 15930 }, { "epoch": 1.39, "learning_rate": 4.303869333566251e-05, "loss": 0.9749, "step": 15940 }, { "epoch": 1.39, "learning_rate": 4.303432614202114e-05, "loss": 0.9181, "step": 15950 }, { "epoch": 1.39, "learning_rate": 4.3029958948379773e-05, "loss": 0.8772, "step": 15960 }, { "epoch": 1.39, "learning_rate": 4.302559175473841e-05, "loss": 0.8677, "step": 15970 }, { "epoch": 1.4, "learning_rate": 4.302122456109704e-05, "loss": 0.8469, "step": 15980 }, { "epoch": 1.4, "learning_rate": 4.301685736745567e-05, "loss": 0.9965, "step": 15990 }, { "epoch": 1.4, "learning_rate": 4.3012490173814306e-05, "loss": 0.8987, "step": 16000 }, { "epoch": 1.4, "learning_rate": 4.3008122980172946e-05, "loss": 1.0017, "step": 16010 }, { "epoch": 1.4, "learning_rate": 4.300375578653157e-05, "loss": 0.8191, "step": 16020 }, { "epoch": 1.4, "learning_rate": 4.299938859289021e-05, "loss": 0.9799, "step": 16030 }, { "epoch": 1.4, "learning_rate": 4.2995021399248846e-05, "loss": 0.8877, "step": 16040 }, { "epoch": 1.4, "learning_rate": 4.299065420560748e-05, "loss": 1.0045, "step": 16050 }, { "epoch": 1.4, "learning_rate": 4.298628701196611e-05, "loss": 0.9374, "step": 16060 }, { "epoch": 1.4, "learning_rate": 4.2981919818324745e-05, "loss": 0.8568, "step": 16070 }, { "epoch": 1.4, "learning_rate": 4.297755262468338e-05, "loss": 0.9129, "step": 16080 }, { "epoch": 1.41, "learning_rate": 4.297318543104201e-05, "loss": 0.8271, "step": 16090 }, { "epoch": 1.41, "learning_rate": 4.296881823740065e-05, "loss": 1.0483, "step": 16100 }, { "epoch": 1.41, "learning_rate": 4.2964451043759285e-05, "loss": 0.8711, "step": 16110 }, { "epoch": 1.41, "learning_rate": 4.296008385011792e-05, "loss": 0.8456, "step": 16120 }, { "epoch": 1.41, "learning_rate": 4.295571665647655e-05, "loss": 0.9166, "step": 16130 }, { "epoch": 1.41, "learning_rate": 4.2951349462835185e-05, "loss": 0.8962, "step": 16140 }, { "epoch": 1.41, "learning_rate": 4.294698226919382e-05, "loss": 0.8397, "step": 16150 }, { "epoch": 1.41, "learning_rate": 4.294261507555245e-05, "loss": 0.8275, "step": 16160 }, { "epoch": 1.41, "learning_rate": 4.2938247881911084e-05, "loss": 0.788, "step": 16170 }, { "epoch": 1.41, "learning_rate": 4.293388068826972e-05, "loss": 0.8464, "step": 16180 }, { "epoch": 1.41, "learning_rate": 4.292951349462835e-05, "loss": 0.889, "step": 16190 }, { "epoch": 1.41, "learning_rate": 4.292514630098699e-05, "loss": 0.9127, "step": 16200 }, { "epoch": 1.42, "learning_rate": 4.2920779107345624e-05, "loss": 0.8973, "step": 16210 }, { "epoch": 1.42, "learning_rate": 4.291641191370426e-05, "loss": 0.9193, "step": 16220 }, { "epoch": 1.42, "learning_rate": 4.291204472006289e-05, "loss": 0.8628, "step": 16230 }, { "epoch": 1.42, "learning_rate": 4.2907677526421523e-05, "loss": 0.9211, "step": 16240 }, { "epoch": 1.42, "learning_rate": 4.290331033278016e-05, "loss": 0.8169, "step": 16250 }, { "epoch": 1.42, "learning_rate": 4.289894313913879e-05, "loss": 0.875, "step": 16260 }, { "epoch": 1.42, "learning_rate": 4.289457594549742e-05, "loss": 0.8632, "step": 16270 }, { "epoch": 1.42, "learning_rate": 4.2890208751856056e-05, "loss": 0.9422, "step": 16280 }, { "epoch": 1.42, "learning_rate": 4.2885841558214696e-05, "loss": 0.8801, "step": 16290 }, { "epoch": 1.42, "learning_rate": 4.288147436457333e-05, "loss": 0.9036, "step": 16300 }, { "epoch": 1.42, "learning_rate": 4.287710717093196e-05, "loss": 0.9846, "step": 16310 }, { "epoch": 1.43, "learning_rate": 4.2872739977290596e-05, "loss": 0.8923, "step": 16320 }, { "epoch": 1.43, "learning_rate": 4.286837278364923e-05, "loss": 0.9101, "step": 16330 }, { "epoch": 1.43, "learning_rate": 4.286400559000786e-05, "loss": 0.9685, "step": 16340 }, { "epoch": 1.43, "learning_rate": 4.2859638396366495e-05, "loss": 0.9544, "step": 16350 }, { "epoch": 1.43, "learning_rate": 4.2855271202725135e-05, "loss": 0.9659, "step": 16360 }, { "epoch": 1.43, "learning_rate": 4.285090400908376e-05, "loss": 0.9084, "step": 16370 }, { "epoch": 1.43, "learning_rate": 4.28465368154424e-05, "loss": 0.8866, "step": 16380 }, { "epoch": 1.43, "learning_rate": 4.284216962180103e-05, "loss": 0.9668, "step": 16390 }, { "epoch": 1.43, "learning_rate": 4.283780242815967e-05, "loss": 0.8922, "step": 16400 }, { "epoch": 1.43, "learning_rate": 4.28334352345183e-05, "loss": 0.8499, "step": 16410 }, { "epoch": 1.43, "learning_rate": 4.2829068040876935e-05, "loss": 0.8611, "step": 16420 }, { "epoch": 1.44, "learning_rate": 4.282470084723557e-05, "loss": 0.8986, "step": 16430 }, { "epoch": 1.44, "learning_rate": 4.28203336535942e-05, "loss": 0.8972, "step": 16440 }, { "epoch": 1.44, "learning_rate": 4.281596645995284e-05, "loss": 0.8521, "step": 16450 }, { "epoch": 1.44, "learning_rate": 4.281159926631147e-05, "loss": 0.8603, "step": 16460 }, { "epoch": 1.44, "learning_rate": 4.280723207267011e-05, "loss": 0.8369, "step": 16470 }, { "epoch": 1.44, "learning_rate": 4.2802864879028734e-05, "loss": 0.8814, "step": 16480 }, { "epoch": 1.44, "learning_rate": 4.2798497685387374e-05, "loss": 0.9379, "step": 16490 }, { "epoch": 1.44, "learning_rate": 4.279413049174601e-05, "loss": 0.8773, "step": 16500 }, { "epoch": 1.44, "learning_rate": 4.278976329810464e-05, "loss": 0.9398, "step": 16510 }, { "epoch": 1.44, "learning_rate": 4.2785396104463273e-05, "loss": 0.9155, "step": 16520 }, { "epoch": 1.44, "learning_rate": 4.278102891082191e-05, "loss": 0.8884, "step": 16530 }, { "epoch": 1.44, "learning_rate": 4.277666171718055e-05, "loss": 0.9837, "step": 16540 }, { "epoch": 1.45, "learning_rate": 4.277229452353917e-05, "loss": 0.9552, "step": 16550 }, { "epoch": 1.45, "learning_rate": 4.276792732989781e-05, "loss": 0.8481, "step": 16560 }, { "epoch": 1.45, "learning_rate": 4.276356013625644e-05, "loss": 0.8376, "step": 16570 }, { "epoch": 1.45, "learning_rate": 4.275919294261508e-05, "loss": 1.0289, "step": 16580 }, { "epoch": 1.45, "learning_rate": 4.2754825748973706e-05, "loss": 0.9598, "step": 16590 }, { "epoch": 1.45, "learning_rate": 4.2750458555332346e-05, "loss": 0.7642, "step": 16600 }, { "epoch": 1.45, "learning_rate": 4.274609136169098e-05, "loss": 0.9944, "step": 16610 }, { "epoch": 1.45, "learning_rate": 4.274172416804961e-05, "loss": 0.8965, "step": 16620 }, { "epoch": 1.45, "learning_rate": 4.273735697440825e-05, "loss": 0.8526, "step": 16630 }, { "epoch": 1.45, "learning_rate": 4.273298978076688e-05, "loss": 0.9839, "step": 16640 }, { "epoch": 1.45, "learning_rate": 4.272862258712552e-05, "loss": 0.9198, "step": 16650 }, { "epoch": 1.46, "learning_rate": 4.2724255393484145e-05, "loss": 0.9558, "step": 16660 }, { "epoch": 1.46, "learning_rate": 4.2719888199842785e-05, "loss": 0.9056, "step": 16670 }, { "epoch": 1.46, "learning_rate": 4.271552100620141e-05, "loss": 0.9045, "step": 16680 }, { "epoch": 1.46, "learning_rate": 4.271115381256005e-05, "loss": 0.9077, "step": 16690 }, { "epoch": 1.46, "learning_rate": 4.2706786618918685e-05, "loss": 0.9907, "step": 16700 }, { "epoch": 1.46, "learning_rate": 4.270241942527732e-05, "loss": 0.7794, "step": 16710 }, { "epoch": 1.46, "learning_rate": 4.269805223163595e-05, "loss": 0.8631, "step": 16720 }, { "epoch": 1.46, "learning_rate": 4.2693685037994584e-05, "loss": 0.8454, "step": 16730 }, { "epoch": 1.46, "learning_rate": 4.2689317844353224e-05, "loss": 0.7876, "step": 16740 }, { "epoch": 1.46, "learning_rate": 4.268495065071185e-05, "loss": 0.8867, "step": 16750 }, { "epoch": 1.46, "learning_rate": 4.268058345707049e-05, "loss": 0.8463, "step": 16760 }, { "epoch": 1.46, "learning_rate": 4.2676216263429124e-05, "loss": 0.9008, "step": 16770 }, { "epoch": 1.47, "learning_rate": 4.267184906978776e-05, "loss": 0.8707, "step": 16780 }, { "epoch": 1.47, "learning_rate": 4.266748187614639e-05, "loss": 1.0162, "step": 16790 }, { "epoch": 1.47, "learning_rate": 4.2663114682505023e-05, "loss": 0.8805, "step": 16800 }, { "epoch": 1.47, "learning_rate": 4.265874748886366e-05, "loss": 0.8647, "step": 16810 }, { "epoch": 1.47, "learning_rate": 4.265438029522229e-05, "loss": 0.8551, "step": 16820 }, { "epoch": 1.47, "learning_rate": 4.265001310158093e-05, "loss": 0.8118, "step": 16830 }, { "epoch": 1.47, "learning_rate": 4.2645645907939556e-05, "loss": 0.9197, "step": 16840 }, { "epoch": 1.47, "learning_rate": 4.2641278714298196e-05, "loss": 0.9326, "step": 16850 }, { "epoch": 1.47, "learning_rate": 4.263691152065683e-05, "loss": 0.9563, "step": 16860 }, { "epoch": 1.47, "learning_rate": 4.263254432701546e-05, "loss": 0.8319, "step": 16870 }, { "epoch": 1.47, "learning_rate": 4.2628177133374096e-05, "loss": 0.8515, "step": 16880 }, { "epoch": 1.48, "learning_rate": 4.262380993973273e-05, "loss": 0.9814, "step": 16890 }, { "epoch": 1.48, "learning_rate": 4.261944274609136e-05, "loss": 0.9255, "step": 16900 }, { "epoch": 1.48, "learning_rate": 4.2615075552449995e-05, "loss": 0.974, "step": 16910 }, { "epoch": 1.48, "learning_rate": 4.261070835880863e-05, "loss": 0.8692, "step": 16920 }, { "epoch": 1.48, "learning_rate": 4.260634116516726e-05, "loss": 0.8521, "step": 16930 }, { "epoch": 1.48, "learning_rate": 4.26019739715259e-05, "loss": 0.8625, "step": 16940 }, { "epoch": 1.48, "learning_rate": 4.2597606777884535e-05, "loss": 0.9537, "step": 16950 }, { "epoch": 1.48, "learning_rate": 4.259323958424317e-05, "loss": 0.8031, "step": 16960 }, { "epoch": 1.48, "learning_rate": 4.25888723906018e-05, "loss": 0.8392, "step": 16970 }, { "epoch": 1.48, "learning_rate": 4.2584505196960435e-05, "loss": 0.9705, "step": 16980 }, { "epoch": 1.48, "learning_rate": 4.258013800331907e-05, "loss": 0.8809, "step": 16990 }, { "epoch": 1.48, "learning_rate": 4.25757708096777e-05, "loss": 0.9855, "step": 17000 }, { "epoch": 1.49, "learning_rate": 4.2571403616036334e-05, "loss": 0.8754, "step": 17010 }, { "epoch": 1.49, "learning_rate": 4.2567036422394974e-05, "loss": 0.8633, "step": 17020 }, { "epoch": 1.49, "learning_rate": 4.256266922875361e-05, "loss": 0.9082, "step": 17030 }, { "epoch": 1.49, "learning_rate": 4.255830203511224e-05, "loss": 0.942, "step": 17040 }, { "epoch": 1.49, "learning_rate": 4.2553934841470874e-05, "loss": 0.8263, "step": 17050 }, { "epoch": 1.49, "learning_rate": 4.254956764782951e-05, "loss": 1.0218, "step": 17060 }, { "epoch": 1.49, "learning_rate": 4.254520045418814e-05, "loss": 0.9534, "step": 17070 }, { "epoch": 1.49, "learning_rate": 4.2540833260546773e-05, "loss": 0.8879, "step": 17080 }, { "epoch": 1.49, "learning_rate": 4.253646606690541e-05, "loss": 0.8308, "step": 17090 }, { "epoch": 1.49, "learning_rate": 4.253209887326404e-05, "loss": 0.9103, "step": 17100 }, { "epoch": 1.49, "learning_rate": 4.252773167962268e-05, "loss": 0.8881, "step": 17110 }, { "epoch": 1.5, "learning_rate": 4.2523364485981306e-05, "loss": 0.9397, "step": 17120 }, { "epoch": 1.5, "learning_rate": 4.2518997292339946e-05, "loss": 0.8297, "step": 17130 }, { "epoch": 1.5, "learning_rate": 4.251463009869858e-05, "loss": 0.8779, "step": 17140 }, { "epoch": 1.5, "learning_rate": 4.251026290505721e-05, "loss": 0.9187, "step": 17150 }, { "epoch": 1.5, "learning_rate": 4.2505895711415846e-05, "loss": 0.9123, "step": 17160 }, { "epoch": 1.5, "learning_rate": 4.250152851777448e-05, "loss": 0.8494, "step": 17170 }, { "epoch": 1.5, "learning_rate": 4.249716132413312e-05, "loss": 0.9272, "step": 17180 }, { "epoch": 1.5, "learning_rate": 4.2492794130491745e-05, "loss": 1.0028, "step": 17190 }, { "epoch": 1.5, "learning_rate": 4.2488426936850385e-05, "loss": 0.8532, "step": 17200 }, { "epoch": 1.5, "learning_rate": 4.248405974320901e-05, "loss": 0.9382, "step": 17210 }, { "epoch": 1.5, "learning_rate": 4.247969254956765e-05, "loss": 1.0165, "step": 17220 }, { "epoch": 1.5, "learning_rate": 4.2475325355926285e-05, "loss": 0.9373, "step": 17230 }, { "epoch": 1.51, "learning_rate": 4.247095816228492e-05, "loss": 0.7885, "step": 17240 }, { "epoch": 1.51, "learning_rate": 4.246659096864355e-05, "loss": 0.873, "step": 17250 }, { "epoch": 1.51, "learning_rate": 4.2462223775002185e-05, "loss": 1.0064, "step": 17260 }, { "epoch": 1.51, "learning_rate": 4.2457856581360825e-05, "loss": 0.9196, "step": 17270 }, { "epoch": 1.51, "learning_rate": 4.245348938771945e-05, "loss": 0.9122, "step": 17280 }, { "epoch": 1.51, "learning_rate": 4.244912219407809e-05, "loss": 0.8801, "step": 17290 }, { "epoch": 1.51, "learning_rate": 4.244475500043672e-05, "loss": 1.0196, "step": 17300 }, { "epoch": 1.51, "learning_rate": 4.244038780679536e-05, "loss": 0.9083, "step": 17310 }, { "epoch": 1.51, "learning_rate": 4.2436020613153984e-05, "loss": 0.8368, "step": 17320 }, { "epoch": 1.51, "learning_rate": 4.2431653419512624e-05, "loss": 0.9514, "step": 17330 }, { "epoch": 1.51, "learning_rate": 4.242728622587126e-05, "loss": 0.9155, "step": 17340 }, { "epoch": 1.52, "learning_rate": 4.242291903222989e-05, "loss": 0.9076, "step": 17350 }, { "epoch": 1.52, "learning_rate": 4.241855183858853e-05, "loss": 0.9341, "step": 17360 }, { "epoch": 1.52, "learning_rate": 4.241418464494716e-05, "loss": 1.0474, "step": 17370 }, { "epoch": 1.52, "learning_rate": 4.24098174513058e-05, "loss": 0.8937, "step": 17380 }, { "epoch": 1.52, "learning_rate": 4.240545025766442e-05, "loss": 0.8946, "step": 17390 }, { "epoch": 1.52, "learning_rate": 4.240108306402306e-05, "loss": 0.8847, "step": 17400 }, { "epoch": 1.52, "learning_rate": 4.239671587038169e-05, "loss": 0.943, "step": 17410 }, { "epoch": 1.52, "learning_rate": 4.239234867674033e-05, "loss": 0.9523, "step": 17420 }, { "epoch": 1.52, "learning_rate": 4.238798148309896e-05, "loss": 0.8895, "step": 17430 }, { "epoch": 1.52, "learning_rate": 4.2383614289457596e-05, "loss": 0.9144, "step": 17440 }, { "epoch": 1.52, "learning_rate": 4.237924709581623e-05, "loss": 0.877, "step": 17450 }, { "epoch": 1.53, "learning_rate": 4.237487990217486e-05, "loss": 0.8782, "step": 17460 }, { "epoch": 1.53, "learning_rate": 4.23705127085335e-05, "loss": 0.9803, "step": 17470 }, { "epoch": 1.53, "learning_rate": 4.236614551489213e-05, "loss": 0.9307, "step": 17480 }, { "epoch": 1.53, "learning_rate": 4.236177832125077e-05, "loss": 0.8361, "step": 17490 }, { "epoch": 1.53, "learning_rate": 4.2357411127609395e-05, "loss": 0.884, "step": 17500 }, { "epoch": 1.53, "learning_rate": 4.2353043933968035e-05, "loss": 0.909, "step": 17510 }, { "epoch": 1.53, "learning_rate": 4.234867674032667e-05, "loss": 0.857, "step": 17520 }, { "epoch": 1.53, "learning_rate": 4.23443095466853e-05, "loss": 0.8743, "step": 17530 }, { "epoch": 1.53, "learning_rate": 4.2339942353043935e-05, "loss": 0.8617, "step": 17540 }, { "epoch": 1.53, "learning_rate": 4.233557515940257e-05, "loss": 0.9176, "step": 17550 }, { "epoch": 1.53, "learning_rate": 4.233120796576121e-05, "loss": 1.0353, "step": 17560 }, { "epoch": 1.53, "learning_rate": 4.2326840772119834e-05, "loss": 0.8611, "step": 17570 }, { "epoch": 1.54, "learning_rate": 4.2322473578478474e-05, "loss": 0.9295, "step": 17580 }, { "epoch": 1.54, "learning_rate": 4.231810638483711e-05, "loss": 0.8975, "step": 17590 }, { "epoch": 1.54, "learning_rate": 4.231373919119574e-05, "loss": 0.9232, "step": 17600 }, { "epoch": 1.54, "learning_rate": 4.2309371997554374e-05, "loss": 0.9378, "step": 17610 }, { "epoch": 1.54, "learning_rate": 4.230500480391301e-05, "loss": 0.8603, "step": 17620 }, { "epoch": 1.54, "learning_rate": 4.230063761027164e-05, "loss": 0.8299, "step": 17630 }, { "epoch": 1.54, "learning_rate": 4.2296270416630273e-05, "loss": 0.9498, "step": 17640 }, { "epoch": 1.54, "learning_rate": 4.229190322298891e-05, "loss": 0.9334, "step": 17650 }, { "epoch": 1.54, "learning_rate": 4.228753602934754e-05, "loss": 0.922, "step": 17660 }, { "epoch": 1.54, "learning_rate": 4.228316883570618e-05, "loss": 0.8291, "step": 17670 }, { "epoch": 1.54, "learning_rate": 4.227880164206481e-05, "loss": 0.9326, "step": 17680 }, { "epoch": 1.55, "learning_rate": 4.2274434448423446e-05, "loss": 0.9288, "step": 17690 }, { "epoch": 1.55, "learning_rate": 4.227006725478208e-05, "loss": 0.9133, "step": 17700 }, { "epoch": 1.55, "learning_rate": 4.226570006114071e-05, "loss": 1.0129, "step": 17710 }, { "epoch": 1.55, "learning_rate": 4.2261332867499346e-05, "loss": 0.9741, "step": 17720 }, { "epoch": 1.55, "learning_rate": 4.225696567385798e-05, "loss": 0.8641, "step": 17730 }, { "epoch": 1.55, "learning_rate": 4.225259848021661e-05, "loss": 0.9729, "step": 17740 }, { "epoch": 1.55, "learning_rate": 4.2248231286575245e-05, "loss": 0.8942, "step": 17750 }, { "epoch": 1.55, "learning_rate": 4.2243864092933885e-05, "loss": 0.8583, "step": 17760 }, { "epoch": 1.55, "learning_rate": 4.223949689929252e-05, "loss": 0.83, "step": 17770 }, { "epoch": 1.55, "learning_rate": 4.223512970565115e-05, "loss": 1.019, "step": 17780 }, { "epoch": 1.55, "learning_rate": 4.2230762512009785e-05, "loss": 0.9381, "step": 17790 }, { "epoch": 1.55, "learning_rate": 4.222639531836842e-05, "loss": 0.8856, "step": 17800 }, { "epoch": 1.56, "learning_rate": 4.222202812472705e-05, "loss": 0.9126, "step": 17810 }, { "epoch": 1.56, "learning_rate": 4.2217660931085685e-05, "loss": 0.8096, "step": 17820 }, { "epoch": 1.56, "learning_rate": 4.221329373744432e-05, "loss": 0.8637, "step": 17830 }, { "epoch": 1.56, "learning_rate": 4.220892654380296e-05, "loss": 0.7979, "step": 17840 }, { "epoch": 1.56, "learning_rate": 4.220455935016159e-05, "loss": 0.8867, "step": 17850 }, { "epoch": 1.56, "learning_rate": 4.2200192156520224e-05, "loss": 0.9575, "step": 17860 }, { "epoch": 1.56, "learning_rate": 4.219582496287886e-05, "loss": 0.8894, "step": 17870 }, { "epoch": 1.56, "learning_rate": 4.219145776923749e-05, "loss": 0.8141, "step": 17880 }, { "epoch": 1.56, "learning_rate": 4.2187090575596124e-05, "loss": 0.9643, "step": 17890 }, { "epoch": 1.56, "learning_rate": 4.218272338195476e-05, "loss": 0.8852, "step": 17900 }, { "epoch": 1.56, "learning_rate": 4.217835618831339e-05, "loss": 0.9744, "step": 17910 }, { "epoch": 1.57, "learning_rate": 4.2173988994672023e-05, "loss": 0.87, "step": 17920 }, { "epoch": 1.57, "learning_rate": 4.2169621801030663e-05, "loss": 0.8538, "step": 17930 }, { "epoch": 1.57, "learning_rate": 4.216525460738929e-05, "loss": 0.9016, "step": 17940 }, { "epoch": 1.57, "learning_rate": 4.216088741374793e-05, "loss": 0.8854, "step": 17950 }, { "epoch": 1.57, "learning_rate": 4.215652022010656e-05, "loss": 0.925, "step": 17960 }, { "epoch": 1.57, "learning_rate": 4.2152153026465196e-05, "loss": 0.8494, "step": 17970 }, { "epoch": 1.57, "learning_rate": 4.214778583282383e-05, "loss": 0.8484, "step": 17980 }, { "epoch": 1.57, "learning_rate": 4.214341863918246e-05, "loss": 0.9416, "step": 17990 }, { "epoch": 1.57, "learning_rate": 4.21390514455411e-05, "loss": 0.8182, "step": 18000 }, { "epoch": 1.57, "learning_rate": 4.213468425189973e-05, "loss": 0.8701, "step": 18010 }, { "epoch": 1.57, "learning_rate": 4.213031705825837e-05, "loss": 0.9982, "step": 18020 }, { "epoch": 1.57, "learning_rate": 4.2125949864616996e-05, "loss": 0.8275, "step": 18030 }, { "epoch": 1.58, "learning_rate": 4.2121582670975635e-05, "loss": 0.9125, "step": 18040 }, { "epoch": 1.58, "learning_rate": 4.211721547733427e-05, "loss": 0.845, "step": 18050 }, { "epoch": 1.58, "learning_rate": 4.21128482836929e-05, "loss": 0.8504, "step": 18060 }, { "epoch": 1.58, "learning_rate": 4.2108481090051535e-05, "loss": 0.8587, "step": 18070 }, { "epoch": 1.58, "learning_rate": 4.210411389641017e-05, "loss": 0.9246, "step": 18080 }, { "epoch": 1.58, "learning_rate": 4.209974670276881e-05, "loss": 1.0316, "step": 18090 }, { "epoch": 1.58, "learning_rate": 4.2095379509127435e-05, "loss": 0.8776, "step": 18100 }, { "epoch": 1.58, "learning_rate": 4.2091012315486075e-05, "loss": 0.8518, "step": 18110 }, { "epoch": 1.58, "learning_rate": 4.20866451218447e-05, "loss": 0.9769, "step": 18120 }, { "epoch": 1.58, "learning_rate": 4.208227792820334e-05, "loss": 0.8358, "step": 18130 }, { "epoch": 1.58, "learning_rate": 4.207791073456197e-05, "loss": 0.9775, "step": 18140 }, { "epoch": 1.59, "learning_rate": 4.207354354092061e-05, "loss": 0.9359, "step": 18150 }, { "epoch": 1.59, "learning_rate": 4.206917634727924e-05, "loss": 0.9132, "step": 18160 }, { "epoch": 1.59, "learning_rate": 4.2064809153637874e-05, "loss": 0.9181, "step": 18170 }, { "epoch": 1.59, "learning_rate": 4.2060441959996514e-05, "loss": 0.8262, "step": 18180 }, { "epoch": 1.59, "learning_rate": 4.205607476635514e-05, "loss": 0.8896, "step": 18190 }, { "epoch": 1.59, "learning_rate": 4.205170757271378e-05, "loss": 0.9324, "step": 18200 }, { "epoch": 1.59, "learning_rate": 4.204734037907241e-05, "loss": 0.9263, "step": 18210 }, { "epoch": 1.59, "learning_rate": 4.204297318543105e-05, "loss": 0.9152, "step": 18220 }, { "epoch": 1.59, "learning_rate": 4.203860599178967e-05, "loss": 0.9595, "step": 18230 }, { "epoch": 1.59, "learning_rate": 4.203423879814831e-05, "loss": 1.0747, "step": 18240 }, { "epoch": 1.59, "learning_rate": 4.2029871604506946e-05, "loss": 0.9535, "step": 18250 }, { "epoch": 1.59, "learning_rate": 4.202550441086558e-05, "loss": 0.8782, "step": 18260 }, { "epoch": 1.6, "learning_rate": 4.202113721722421e-05, "loss": 0.904, "step": 18270 }, { "epoch": 1.6, "learning_rate": 4.2016770023582846e-05, "loss": 0.857, "step": 18280 }, { "epoch": 1.6, "learning_rate": 4.2012402829941486e-05, "loss": 1.0048, "step": 18290 }, { "epoch": 1.6, "learning_rate": 4.200803563630011e-05, "loss": 0.8859, "step": 18300 }, { "epoch": 1.6, "learning_rate": 4.200366844265875e-05, "loss": 0.8749, "step": 18310 }, { "epoch": 1.6, "learning_rate": 4.199930124901738e-05, "loss": 0.88, "step": 18320 }, { "epoch": 1.6, "learning_rate": 4.199493405537602e-05, "loss": 0.8392, "step": 18330 }, { "epoch": 1.6, "learning_rate": 4.199056686173465e-05, "loss": 0.98, "step": 18340 }, { "epoch": 1.6, "learning_rate": 4.1986199668093285e-05, "loss": 0.8203, "step": 18350 }, { "epoch": 1.6, "learning_rate": 4.198183247445192e-05, "loss": 1.013, "step": 18360 }, { "epoch": 1.6, "learning_rate": 4.197746528081055e-05, "loss": 0.8189, "step": 18370 }, { "epoch": 1.61, "learning_rate": 4.197309808716919e-05, "loss": 0.8422, "step": 18380 }, { "epoch": 1.61, "learning_rate": 4.196873089352782e-05, "loss": 0.9924, "step": 18390 }, { "epoch": 1.61, "learning_rate": 4.196436369988646e-05, "loss": 0.8456, "step": 18400 }, { "epoch": 1.61, "learning_rate": 4.1959996506245084e-05, "loss": 0.9858, "step": 18410 }, { "epoch": 1.61, "learning_rate": 4.1955629312603724e-05, "loss": 0.8816, "step": 18420 }, { "epoch": 1.61, "learning_rate": 4.195126211896236e-05, "loss": 0.9604, "step": 18430 }, { "epoch": 1.61, "learning_rate": 4.194689492532099e-05, "loss": 0.9552, "step": 18440 }, { "epoch": 1.61, "learning_rate": 4.1942527731679624e-05, "loss": 0.876, "step": 18450 }, { "epoch": 1.61, "learning_rate": 4.193816053803826e-05, "loss": 0.8493, "step": 18460 }, { "epoch": 1.61, "learning_rate": 4.193379334439689e-05, "loss": 0.9565, "step": 18470 }, { "epoch": 1.61, "learning_rate": 4.1929426150755524e-05, "loss": 0.845, "step": 18480 }, { "epoch": 1.61, "learning_rate": 4.1925058957114163e-05, "loss": 0.8699, "step": 18490 }, { "epoch": 1.62, "learning_rate": 4.19206917634728e-05, "loss": 0.8708, "step": 18500 }, { "epoch": 1.62, "learning_rate": 4.191632456983143e-05, "loss": 0.8965, "step": 18510 }, { "epoch": 1.62, "learning_rate": 4.191195737619006e-05, "loss": 0.8135, "step": 18520 }, { "epoch": 1.62, "learning_rate": 4.1907590182548696e-05, "loss": 0.8992, "step": 18530 }, { "epoch": 1.62, "learning_rate": 4.190322298890733e-05, "loss": 0.9404, "step": 18540 }, { "epoch": 1.62, "learning_rate": 4.189885579526596e-05, "loss": 0.7823, "step": 18550 }, { "epoch": 1.62, "learning_rate": 4.1894488601624596e-05, "loss": 0.9082, "step": 18560 }, { "epoch": 1.62, "learning_rate": 4.189012140798323e-05, "loss": 0.9906, "step": 18570 }, { "epoch": 1.62, "learning_rate": 4.188575421434187e-05, "loss": 0.9024, "step": 18580 }, { "epoch": 1.62, "learning_rate": 4.18813870207005e-05, "loss": 0.836, "step": 18590 }, { "epoch": 1.62, "learning_rate": 4.1877019827059136e-05, "loss": 0.7802, "step": 18600 }, { "epoch": 1.63, "learning_rate": 4.187265263341777e-05, "loss": 0.8276, "step": 18610 }, { "epoch": 1.63, "learning_rate": 4.18682854397764e-05, "loss": 0.9457, "step": 18620 }, { "epoch": 1.63, "learning_rate": 4.1863918246135035e-05, "loss": 0.9752, "step": 18630 }, { "epoch": 1.63, "learning_rate": 4.185955105249367e-05, "loss": 0.9981, "step": 18640 }, { "epoch": 1.63, "learning_rate": 4.18551838588523e-05, "loss": 0.9319, "step": 18650 }, { "epoch": 1.63, "learning_rate": 4.185081666521094e-05, "loss": 0.9295, "step": 18660 }, { "epoch": 1.63, "learning_rate": 4.184644947156957e-05, "loss": 0.9695, "step": 18670 }, { "epoch": 1.63, "learning_rate": 4.184208227792821e-05, "loss": 0.9095, "step": 18680 }, { "epoch": 1.63, "learning_rate": 4.183771508428684e-05, "loss": 0.9705, "step": 18690 }, { "epoch": 1.63, "learning_rate": 4.1833347890645474e-05, "loss": 0.901, "step": 18700 }, { "epoch": 1.63, "learning_rate": 4.182898069700411e-05, "loss": 0.8911, "step": 18710 }, { "epoch": 1.64, "learning_rate": 4.182461350336274e-05, "loss": 0.8377, "step": 18720 }, { "epoch": 1.64, "learning_rate": 4.1820246309721374e-05, "loss": 0.9186, "step": 18730 }, { "epoch": 1.64, "learning_rate": 4.181587911608001e-05, "loss": 0.8603, "step": 18740 }, { "epoch": 1.64, "learning_rate": 4.181151192243865e-05, "loss": 0.8852, "step": 18750 }, { "epoch": 1.64, "learning_rate": 4.1807144728797274e-05, "loss": 1.0386, "step": 18760 }, { "epoch": 1.64, "learning_rate": 4.1802777535155914e-05, "loss": 0.9363, "step": 18770 }, { "epoch": 1.64, "learning_rate": 4.179841034151455e-05, "loss": 0.9045, "step": 18780 }, { "epoch": 1.64, "learning_rate": 4.179404314787318e-05, "loss": 0.9825, "step": 18790 }, { "epoch": 1.64, "learning_rate": 4.178967595423181e-05, "loss": 0.8761, "step": 18800 }, { "epoch": 1.64, "learning_rate": 4.1785308760590446e-05, "loss": 0.8271, "step": 18810 }, { "epoch": 1.64, "learning_rate": 4.178094156694908e-05, "loss": 0.8955, "step": 18820 }, { "epoch": 1.64, "learning_rate": 4.177657437330771e-05, "loss": 0.9342, "step": 18830 }, { "epoch": 1.65, "learning_rate": 4.177220717966635e-05, "loss": 0.8671, "step": 18840 }, { "epoch": 1.65, "learning_rate": 4.176783998602498e-05, "loss": 0.8368, "step": 18850 }, { "epoch": 1.65, "learning_rate": 4.176347279238362e-05, "loss": 1.0078, "step": 18860 }, { "epoch": 1.65, "learning_rate": 4.1759105598742246e-05, "loss": 0.8837, "step": 18870 }, { "epoch": 1.65, "learning_rate": 4.1754738405100886e-05, "loss": 0.9034, "step": 18880 }, { "epoch": 1.65, "learning_rate": 4.175037121145952e-05, "loss": 0.8497, "step": 18890 }, { "epoch": 1.65, "learning_rate": 4.174600401781815e-05, "loss": 0.8728, "step": 18900 }, { "epoch": 1.65, "learning_rate": 4.174163682417679e-05, "loss": 0.9399, "step": 18910 }, { "epoch": 1.65, "learning_rate": 4.173726963053542e-05, "loss": 1.0085, "step": 18920 }, { "epoch": 1.65, "learning_rate": 4.173290243689406e-05, "loss": 0.9641, "step": 18930 }, { "epoch": 1.65, "learning_rate": 4.1728535243252685e-05, "loss": 0.8724, "step": 18940 }, { "epoch": 1.66, "learning_rate": 4.1724168049611325e-05, "loss": 0.885, "step": 18950 }, { "epoch": 1.66, "learning_rate": 4.171980085596995e-05, "loss": 0.8666, "step": 18960 }, { "epoch": 1.66, "learning_rate": 4.171543366232859e-05, "loss": 0.885, "step": 18970 }, { "epoch": 1.66, "learning_rate": 4.1711066468687224e-05, "loss": 0.8987, "step": 18980 }, { "epoch": 1.66, "learning_rate": 4.170669927504586e-05, "loss": 0.8221, "step": 18990 }, { "epoch": 1.66, "learning_rate": 4.170233208140449e-05, "loss": 0.9286, "step": 19000 }, { "epoch": 1.66, "learning_rate": 4.1697964887763124e-05, "loss": 1.017, "step": 19010 }, { "epoch": 1.66, "learning_rate": 4.1693597694121764e-05, "loss": 0.8988, "step": 19020 }, { "epoch": 1.66, "learning_rate": 4.168923050048039e-05, "loss": 0.8287, "step": 19030 }, { "epoch": 1.66, "learning_rate": 4.168486330683903e-05, "loss": 0.8693, "step": 19040 }, { "epoch": 1.66, "learning_rate": 4.168049611319766e-05, "loss": 0.9094, "step": 19050 }, { "epoch": 1.66, "learning_rate": 4.16761289195563e-05, "loss": 0.8359, "step": 19060 }, { "epoch": 1.67, "learning_rate": 4.167176172591493e-05, "loss": 0.9595, "step": 19070 }, { "epoch": 1.67, "learning_rate": 4.166739453227356e-05, "loss": 0.882, "step": 19080 }, { "epoch": 1.67, "learning_rate": 4.1663027338632196e-05, "loss": 1.0107, "step": 19090 }, { "epoch": 1.67, "learning_rate": 4.165866014499083e-05, "loss": 0.8231, "step": 19100 }, { "epoch": 1.67, "learning_rate": 4.165429295134947e-05, "loss": 0.7482, "step": 19110 }, { "epoch": 1.67, "learning_rate": 4.1649925757708096e-05, "loss": 0.84, "step": 19120 }, { "epoch": 1.67, "learning_rate": 4.1645558564066736e-05, "loss": 1.0007, "step": 19130 }, { "epoch": 1.67, "learning_rate": 4.164119137042536e-05, "loss": 0.8836, "step": 19140 }, { "epoch": 1.67, "learning_rate": 4.1636824176784e-05, "loss": 0.8767, "step": 19150 }, { "epoch": 1.67, "learning_rate": 4.1632456983142636e-05, "loss": 0.8825, "step": 19160 }, { "epoch": 1.67, "learning_rate": 4.162808978950127e-05, "loss": 0.8629, "step": 19170 }, { "epoch": 1.68, "learning_rate": 4.16237225958599e-05, "loss": 0.8175, "step": 19180 }, { "epoch": 1.68, "learning_rate": 4.1619355402218535e-05, "loss": 0.9335, "step": 19190 }, { "epoch": 1.68, "learning_rate": 4.161498820857717e-05, "loss": 0.9198, "step": 19200 }, { "epoch": 1.68, "learning_rate": 4.16106210149358e-05, "loss": 0.8916, "step": 19210 }, { "epoch": 1.68, "learning_rate": 4.160625382129444e-05, "loss": 0.9295, "step": 19220 }, { "epoch": 1.68, "learning_rate": 4.160188662765307e-05, "loss": 0.9003, "step": 19230 }, { "epoch": 1.68, "learning_rate": 4.159751943401171e-05, "loss": 0.7715, "step": 19240 }, { "epoch": 1.68, "learning_rate": 4.159315224037034e-05, "loss": 0.9378, "step": 19250 }, { "epoch": 1.68, "learning_rate": 4.1588785046728974e-05, "loss": 0.847, "step": 19260 }, { "epoch": 1.68, "learning_rate": 4.158441785308761e-05, "loss": 0.8368, "step": 19270 }, { "epoch": 1.68, "learning_rate": 4.158005065944624e-05, "loss": 0.8627, "step": 19280 }, { "epoch": 1.68, "learning_rate": 4.1575683465804874e-05, "loss": 0.907, "step": 19290 }, { "epoch": 1.69, "learning_rate": 4.157131627216351e-05, "loss": 0.8004, "step": 19300 }, { "epoch": 1.69, "learning_rate": 4.156694907852215e-05, "loss": 0.9652, "step": 19310 }, { "epoch": 1.69, "learning_rate": 4.156258188488078e-05, "loss": 0.8974, "step": 19320 }, { "epoch": 1.69, "learning_rate": 4.1558214691239414e-05, "loss": 0.9365, "step": 19330 }, { "epoch": 1.69, "learning_rate": 4.155384749759805e-05, "loss": 0.8668, "step": 19340 }, { "epoch": 1.69, "learning_rate": 4.154948030395668e-05, "loss": 0.9372, "step": 19350 }, { "epoch": 1.69, "learning_rate": 4.154511311031531e-05, "loss": 0.9489, "step": 19360 }, { "epoch": 1.69, "learning_rate": 4.1540745916673946e-05, "loss": 0.8398, "step": 19370 }, { "epoch": 1.69, "learning_rate": 4.153637872303258e-05, "loss": 0.8952, "step": 19380 }, { "epoch": 1.69, "learning_rate": 4.153201152939121e-05, "loss": 0.927, "step": 19390 }, { "epoch": 1.69, "learning_rate": 4.1527644335749846e-05, "loss": 1.0132, "step": 19400 }, { "epoch": 1.7, "learning_rate": 4.1523277142108486e-05, "loss": 0.8372, "step": 19410 }, { "epoch": 1.7, "learning_rate": 4.151890994846712e-05, "loss": 0.7357, "step": 19420 }, { "epoch": 1.7, "learning_rate": 4.151454275482575e-05, "loss": 0.9532, "step": 19430 }, { "epoch": 1.7, "learning_rate": 4.1510175561184386e-05, "loss": 0.933, "step": 19440 }, { "epoch": 1.7, "learning_rate": 4.150580836754302e-05, "loss": 0.881, "step": 19450 }, { "epoch": 1.7, "learning_rate": 4.150144117390165e-05, "loss": 0.8622, "step": 19460 }, { "epoch": 1.7, "learning_rate": 4.1497073980260285e-05, "loss": 0.7896, "step": 19470 }, { "epoch": 1.7, "learning_rate": 4.1492706786618925e-05, "loss": 0.9075, "step": 19480 }, { "epoch": 1.7, "learning_rate": 4.148833959297755e-05, "loss": 0.927, "step": 19490 }, { "epoch": 1.7, "learning_rate": 4.148397239933619e-05, "loss": 0.7989, "step": 19500 }, { "epoch": 1.7, "learning_rate": 4.1479605205694825e-05, "loss": 0.8322, "step": 19510 }, { "epoch": 1.7, "learning_rate": 4.147523801205346e-05, "loss": 0.8631, "step": 19520 }, { "epoch": 1.71, "learning_rate": 4.147087081841209e-05, "loss": 0.951, "step": 19530 }, { "epoch": 1.71, "learning_rate": 4.1466503624770724e-05, "loss": 0.9259, "step": 19540 }, { "epoch": 1.71, "learning_rate": 4.146213643112936e-05, "loss": 0.8473, "step": 19550 }, { "epoch": 1.71, "learning_rate": 4.145776923748799e-05, "loss": 0.8849, "step": 19560 }, { "epoch": 1.71, "learning_rate": 4.145340204384663e-05, "loss": 0.9134, "step": 19570 }, { "epoch": 1.71, "learning_rate": 4.144903485020526e-05, "loss": 0.8946, "step": 19580 }, { "epoch": 1.71, "learning_rate": 4.14446676565639e-05, "loss": 0.8402, "step": 19590 }, { "epoch": 1.71, "learning_rate": 4.1440300462922524e-05, "loss": 0.8189, "step": 19600 }, { "epoch": 1.71, "learning_rate": 4.1435933269281164e-05, "loss": 0.9458, "step": 19610 }, { "epoch": 1.71, "learning_rate": 4.14315660756398e-05, "loss": 0.8323, "step": 19620 }, { "epoch": 1.71, "learning_rate": 4.142719888199843e-05, "loss": 0.8898, "step": 19630 }, { "epoch": 1.72, "learning_rate": 4.142283168835706e-05, "loss": 0.9271, "step": 19640 }, { "epoch": 1.72, "learning_rate": 4.1418464494715696e-05, "loss": 0.815, "step": 19650 }, { "epoch": 1.72, "learning_rate": 4.1414097301074336e-05, "loss": 0.8342, "step": 19660 }, { "epoch": 1.72, "learning_rate": 4.140973010743296e-05, "loss": 0.801, "step": 19670 }, { "epoch": 1.72, "learning_rate": 4.14053629137916e-05, "loss": 0.9814, "step": 19680 }, { "epoch": 1.72, "learning_rate": 4.140099572015023e-05, "loss": 0.8655, "step": 19690 }, { "epoch": 1.72, "learning_rate": 4.139662852650887e-05, "loss": 0.9116, "step": 19700 }, { "epoch": 1.72, "learning_rate": 4.13922613328675e-05, "loss": 0.9432, "step": 19710 }, { "epoch": 1.72, "learning_rate": 4.1387894139226136e-05, "loss": 0.9369, "step": 19720 }, { "epoch": 1.72, "learning_rate": 4.138352694558477e-05, "loss": 0.8762, "step": 19730 }, { "epoch": 1.72, "learning_rate": 4.13791597519434e-05, "loss": 0.9253, "step": 19740 }, { "epoch": 1.73, "learning_rate": 4.137479255830204e-05, "loss": 0.7886, "step": 19750 }, { "epoch": 1.73, "learning_rate": 4.137042536466067e-05, "loss": 0.9707, "step": 19760 }, { "epoch": 1.73, "learning_rate": 4.136605817101931e-05, "loss": 0.8852, "step": 19770 }, { "epoch": 1.73, "learning_rate": 4.1361690977377935e-05, "loss": 0.8879, "step": 19780 }, { "epoch": 1.73, "learning_rate": 4.1357323783736575e-05, "loss": 0.9292, "step": 19790 }, { "epoch": 1.73, "learning_rate": 4.13529565900952e-05, "loss": 0.823, "step": 19800 }, { "epoch": 1.73, "learning_rate": 4.134858939645384e-05, "loss": 0.9564, "step": 19810 }, { "epoch": 1.73, "learning_rate": 4.1344222202812474e-05, "loss": 0.9664, "step": 19820 }, { "epoch": 1.73, "learning_rate": 4.133985500917111e-05, "loss": 0.8088, "step": 19830 }, { "epoch": 1.73, "learning_rate": 4.133548781552975e-05, "loss": 0.8775, "step": 19840 }, { "epoch": 1.73, "learning_rate": 4.1331120621888374e-05, "loss": 0.8847, "step": 19850 }, { "epoch": 1.73, "learning_rate": 4.1326753428247014e-05, "loss": 0.9577, "step": 19860 }, { "epoch": 1.74, "learning_rate": 4.132238623460564e-05, "loss": 0.8617, "step": 19870 }, { "epoch": 1.74, "learning_rate": 4.131801904096428e-05, "loss": 0.9295, "step": 19880 }, { "epoch": 1.74, "learning_rate": 4.1313651847322914e-05, "loss": 0.8586, "step": 19890 }, { "epoch": 1.74, "learning_rate": 4.130928465368155e-05, "loss": 0.9147, "step": 19900 }, { "epoch": 1.74, "learning_rate": 4.130491746004018e-05, "loss": 0.859, "step": 19910 }, { "epoch": 1.74, "learning_rate": 4.130055026639881e-05, "loss": 0.9751, "step": 19920 }, { "epoch": 1.74, "learning_rate": 4.1296183072757446e-05, "loss": 1.0088, "step": 19930 }, { "epoch": 1.74, "learning_rate": 4.129181587911608e-05, "loss": 0.9525, "step": 19940 }, { "epoch": 1.74, "learning_rate": 4.128744868547472e-05, "loss": 0.8735, "step": 19950 }, { "epoch": 1.74, "learning_rate": 4.1283081491833346e-05, "loss": 1.0307, "step": 19960 }, { "epoch": 1.74, "learning_rate": 4.1278714298191986e-05, "loss": 0.8379, "step": 19970 }, { "epoch": 1.75, "learning_rate": 4.127434710455062e-05, "loss": 0.899, "step": 19980 }, { "epoch": 1.75, "learning_rate": 4.126997991090925e-05, "loss": 0.8703, "step": 19990 }, { "epoch": 1.75, "learning_rate": 4.1265612717267886e-05, "loss": 1.0364, "step": 20000 }, { "epoch": 1.75, "eval_accuracy": 0.574629065539943, "eval_loss": 0.9053159356117249, "eval_runtime": 84.1188, "eval_samples_per_second": 120.984, "eval_steps_per_second": 15.133, "step": 20000 }, { "epoch": 1.75, "learning_rate": 4.126124552362652e-05, "loss": 0.8943, "step": 20010 }, { "epoch": 1.75, "learning_rate": 4.125687832998515e-05, "loss": 0.8525, "step": 20020 }, { "epoch": 1.75, "learning_rate": 4.1252511136343785e-05, "loss": 0.9, "step": 20030 }, { "epoch": 1.75, "learning_rate": 4.1248143942702425e-05, "loss": 0.8541, "step": 20040 }, { "epoch": 1.75, "learning_rate": 4.124377674906105e-05, "loss": 0.821, "step": 20050 }, { "epoch": 1.75, "learning_rate": 4.123940955541969e-05, "loss": 0.8161, "step": 20060 }, { "epoch": 1.75, "learning_rate": 4.1235042361778325e-05, "loss": 0.8613, "step": 20070 }, { "epoch": 1.75, "learning_rate": 4.123067516813696e-05, "loss": 0.8843, "step": 20080 }, { "epoch": 1.75, "learning_rate": 4.122630797449559e-05, "loss": 0.8546, "step": 20090 }, { "epoch": 1.76, "learning_rate": 4.1221940780854224e-05, "loss": 0.8802, "step": 20100 }, { "epoch": 1.76, "learning_rate": 4.121757358721286e-05, "loss": 0.8566, "step": 20110 }, { "epoch": 1.76, "learning_rate": 4.121320639357149e-05, "loss": 0.8757, "step": 20120 }, { "epoch": 1.76, "learning_rate": 4.1208839199930124e-05, "loss": 0.9832, "step": 20130 }, { "epoch": 1.76, "learning_rate": 4.1204472006288764e-05, "loss": 0.8816, "step": 20140 }, { "epoch": 1.76, "learning_rate": 4.12001048126474e-05, "loss": 0.7858, "step": 20150 }, { "epoch": 1.76, "learning_rate": 4.119573761900603e-05, "loss": 0.9879, "step": 20160 }, { "epoch": 1.76, "learning_rate": 4.1191370425364664e-05, "loss": 1.0506, "step": 20170 }, { "epoch": 1.76, "learning_rate": 4.11870032317233e-05, "loss": 0.9166, "step": 20180 }, { "epoch": 1.76, "learning_rate": 4.118263603808193e-05, "loss": 0.8969, "step": 20190 }, { "epoch": 1.76, "learning_rate": 4.117826884444056e-05, "loss": 0.7902, "step": 20200 }, { "epoch": 1.77, "learning_rate": 4.1173901650799196e-05, "loss": 0.9282, "step": 20210 }, { "epoch": 1.77, "learning_rate": 4.116953445715783e-05, "loss": 0.8329, "step": 20220 }, { "epoch": 1.77, "learning_rate": 4.116516726351647e-05, "loss": 1.0167, "step": 20230 }, { "epoch": 1.77, "learning_rate": 4.11608000698751e-05, "loss": 0.9792, "step": 20240 }, { "epoch": 1.77, "learning_rate": 4.1156432876233736e-05, "loss": 0.9326, "step": 20250 }, { "epoch": 1.77, "learning_rate": 4.115206568259237e-05, "loss": 0.9197, "step": 20260 }, { "epoch": 1.77, "learning_rate": 4.1147698488951e-05, "loss": 0.8049, "step": 20270 }, { "epoch": 1.77, "learning_rate": 4.1143331295309636e-05, "loss": 0.9345, "step": 20280 }, { "epoch": 1.77, "learning_rate": 4.113896410166827e-05, "loss": 0.8331, "step": 20290 }, { "epoch": 1.77, "learning_rate": 4.11345969080269e-05, "loss": 1.0869, "step": 20300 }, { "epoch": 1.77, "learning_rate": 4.1130229714385535e-05, "loss": 0.8144, "step": 20310 }, { "epoch": 1.77, "learning_rate": 4.1125862520744175e-05, "loss": 1.0061, "step": 20320 }, { "epoch": 1.78, "learning_rate": 4.11214953271028e-05, "loss": 0.9571, "step": 20330 }, { "epoch": 1.78, "learning_rate": 4.111712813346144e-05, "loss": 0.8798, "step": 20340 }, { "epoch": 1.78, "learning_rate": 4.1112760939820075e-05, "loss": 0.804, "step": 20350 }, { "epoch": 1.78, "learning_rate": 4.110839374617871e-05, "loss": 0.8891, "step": 20360 }, { "epoch": 1.78, "learning_rate": 4.110402655253734e-05, "loss": 0.8569, "step": 20370 }, { "epoch": 1.78, "learning_rate": 4.1099659358895974e-05, "loss": 0.9317, "step": 20380 }, { "epoch": 1.78, "learning_rate": 4.1095292165254614e-05, "loss": 0.8127, "step": 20390 }, { "epoch": 1.78, "learning_rate": 4.109092497161324e-05, "loss": 0.8602, "step": 20400 }, { "epoch": 1.78, "learning_rate": 4.108655777797188e-05, "loss": 0.8579, "step": 20410 }, { "epoch": 1.78, "learning_rate": 4.108219058433051e-05, "loss": 0.8826, "step": 20420 }, { "epoch": 1.78, "learning_rate": 4.107782339068915e-05, "loss": 0.8966, "step": 20430 }, { "epoch": 1.79, "learning_rate": 4.107345619704778e-05, "loss": 0.9043, "step": 20440 }, { "epoch": 1.79, "learning_rate": 4.1069089003406414e-05, "loss": 1.0149, "step": 20450 }, { "epoch": 1.79, "learning_rate": 4.106472180976505e-05, "loss": 0.9105, "step": 20460 }, { "epoch": 1.79, "learning_rate": 4.106035461612368e-05, "loss": 0.7942, "step": 20470 }, { "epoch": 1.79, "learning_rate": 4.105598742248232e-05, "loss": 0.8985, "step": 20480 }, { "epoch": 1.79, "learning_rate": 4.1051620228840946e-05, "loss": 0.892, "step": 20490 }, { "epoch": 1.79, "learning_rate": 4.1047253035199586e-05, "loss": 0.8625, "step": 20500 }, { "epoch": 1.79, "learning_rate": 4.104288584155821e-05, "loss": 0.9714, "step": 20510 }, { "epoch": 1.79, "learning_rate": 4.103851864791685e-05, "loss": 1.0135, "step": 20520 }, { "epoch": 1.79, "learning_rate": 4.103415145427548e-05, "loss": 0.8904, "step": 20530 }, { "epoch": 1.79, "learning_rate": 4.102978426063412e-05, "loss": 0.8925, "step": 20540 }, { "epoch": 1.79, "learning_rate": 4.102541706699275e-05, "loss": 0.9535, "step": 20550 }, { "epoch": 1.8, "learning_rate": 4.1021049873351386e-05, "loss": 0.9517, "step": 20560 }, { "epoch": 1.8, "learning_rate": 4.1016682679710026e-05, "loss": 0.9011, "step": 20570 }, { "epoch": 1.8, "learning_rate": 4.101231548606865e-05, "loss": 0.9281, "step": 20580 }, { "epoch": 1.8, "learning_rate": 4.100794829242729e-05, "loss": 0.9168, "step": 20590 }, { "epoch": 1.8, "learning_rate": 4.100358109878592e-05, "loss": 0.911, "step": 20600 }, { "epoch": 1.8, "learning_rate": 4.099921390514456e-05, "loss": 0.891, "step": 20610 }, { "epoch": 1.8, "learning_rate": 4.0994846711503185e-05, "loss": 0.857, "step": 20620 }, { "epoch": 1.8, "learning_rate": 4.0990479517861825e-05, "loss": 0.794, "step": 20630 }, { "epoch": 1.8, "learning_rate": 4.098611232422046e-05, "loss": 0.8943, "step": 20640 }, { "epoch": 1.8, "learning_rate": 4.098174513057909e-05, "loss": 0.839, "step": 20650 }, { "epoch": 1.8, "learning_rate": 4.0977377936937724e-05, "loss": 0.8686, "step": 20660 }, { "epoch": 1.81, "learning_rate": 4.097301074329636e-05, "loss": 0.8671, "step": 20670 }, { "epoch": 1.81, "learning_rate": 4.0968643549655e-05, "loss": 0.8398, "step": 20680 }, { "epoch": 1.81, "learning_rate": 4.0964276356013624e-05, "loss": 0.8708, "step": 20690 }, { "epoch": 1.81, "learning_rate": 4.0959909162372264e-05, "loss": 0.9895, "step": 20700 }, { "epoch": 1.81, "learning_rate": 4.095554196873089e-05, "loss": 0.9162, "step": 20710 }, { "epoch": 1.81, "learning_rate": 4.095117477508953e-05, "loss": 0.8487, "step": 20720 }, { "epoch": 1.81, "learning_rate": 4.0946807581448164e-05, "loss": 0.9237, "step": 20730 }, { "epoch": 1.81, "learning_rate": 4.09424403878068e-05, "loss": 0.8511, "step": 20740 }, { "epoch": 1.81, "learning_rate": 4.093807319416543e-05, "loss": 0.9194, "step": 20750 }, { "epoch": 1.81, "learning_rate": 4.093370600052406e-05, "loss": 0.8543, "step": 20760 }, { "epoch": 1.81, "learning_rate": 4.09293388068827e-05, "loss": 0.8915, "step": 20770 }, { "epoch": 1.82, "learning_rate": 4.092497161324133e-05, "loss": 0.8599, "step": 20780 }, { "epoch": 1.82, "learning_rate": 4.092060441959997e-05, "loss": 0.9343, "step": 20790 }, { "epoch": 1.82, "learning_rate": 4.09162372259586e-05, "loss": 0.8082, "step": 20800 }, { "epoch": 1.82, "learning_rate": 4.0911870032317236e-05, "loss": 0.7932, "step": 20810 }, { "epoch": 1.82, "learning_rate": 4.090750283867587e-05, "loss": 0.9403, "step": 20820 }, { "epoch": 1.82, "learning_rate": 4.09031356450345e-05, "loss": 0.8183, "step": 20830 }, { "epoch": 1.82, "learning_rate": 4.0898768451393136e-05, "loss": 0.8393, "step": 20840 }, { "epoch": 1.82, "learning_rate": 4.089440125775177e-05, "loss": 0.7841, "step": 20850 }, { "epoch": 1.82, "learning_rate": 4.08900340641104e-05, "loss": 0.8817, "step": 20860 }, { "epoch": 1.82, "learning_rate": 4.0885666870469035e-05, "loss": 0.9287, "step": 20870 }, { "epoch": 1.82, "learning_rate": 4.0881299676827675e-05, "loss": 0.7928, "step": 20880 }, { "epoch": 1.82, "learning_rate": 4.087693248318631e-05, "loss": 0.9636, "step": 20890 }, { "epoch": 1.83, "learning_rate": 4.087256528954494e-05, "loss": 0.9103, "step": 20900 }, { "epoch": 1.83, "learning_rate": 4.0868198095903575e-05, "loss": 0.8192, "step": 20910 }, { "epoch": 1.83, "learning_rate": 4.086383090226221e-05, "loss": 0.9612, "step": 20920 }, { "epoch": 1.83, "learning_rate": 4.085946370862084e-05, "loss": 0.8878, "step": 20930 }, { "epoch": 1.83, "learning_rate": 4.0855096514979474e-05, "loss": 0.8935, "step": 20940 }, { "epoch": 1.83, "learning_rate": 4.085072932133811e-05, "loss": 0.9094, "step": 20950 }, { "epoch": 1.83, "learning_rate": 4.084636212769675e-05, "loss": 0.9467, "step": 20960 }, { "epoch": 1.83, "learning_rate": 4.084199493405538e-05, "loss": 0.9798, "step": 20970 }, { "epoch": 1.83, "learning_rate": 4.0837627740414014e-05, "loss": 0.7955, "step": 20980 }, { "epoch": 1.83, "learning_rate": 4.083326054677265e-05, "loss": 0.8728, "step": 20990 }, { "epoch": 1.83, "learning_rate": 4.082889335313128e-05, "loss": 0.9996, "step": 21000 }, { "epoch": 1.84, "learning_rate": 4.0824526159489914e-05, "loss": 0.998, "step": 21010 }, { "epoch": 1.84, "learning_rate": 4.082015896584855e-05, "loss": 0.861, "step": 21020 }, { "epoch": 1.84, "learning_rate": 4.081579177220718e-05, "loss": 0.8528, "step": 21030 }, { "epoch": 1.84, "learning_rate": 4.081142457856581e-05, "loss": 0.8457, "step": 21040 }, { "epoch": 1.84, "learning_rate": 4.080705738492445e-05, "loss": 0.8819, "step": 21050 }, { "epoch": 1.84, "learning_rate": 4.080269019128308e-05, "loss": 0.9683, "step": 21060 }, { "epoch": 1.84, "learning_rate": 4.079832299764172e-05, "loss": 0.957, "step": 21070 }, { "epoch": 1.84, "learning_rate": 4.079395580400035e-05, "loss": 0.9263, "step": 21080 }, { "epoch": 1.84, "learning_rate": 4.0789588610358986e-05, "loss": 0.7987, "step": 21090 }, { "epoch": 1.84, "learning_rate": 4.078522141671762e-05, "loss": 0.9648, "step": 21100 }, { "epoch": 1.84, "learning_rate": 4.078085422307625e-05, "loss": 0.907, "step": 21110 }, { "epoch": 1.84, "learning_rate": 4.0776487029434886e-05, "loss": 0.874, "step": 21120 }, { "epoch": 1.85, "learning_rate": 4.077211983579352e-05, "loss": 0.9533, "step": 21130 }, { "epoch": 1.85, "learning_rate": 4.076775264215216e-05, "loss": 0.973, "step": 21140 }, { "epoch": 1.85, "learning_rate": 4.0763385448510785e-05, "loss": 0.8306, "step": 21150 }, { "epoch": 1.85, "learning_rate": 4.0759018254869425e-05, "loss": 0.7781, "step": 21160 }, { "epoch": 1.85, "learning_rate": 4.075465106122806e-05, "loss": 0.9331, "step": 21170 }, { "epoch": 1.85, "learning_rate": 4.075028386758669e-05, "loss": 0.8894, "step": 21180 }, { "epoch": 1.85, "learning_rate": 4.0745916673945325e-05, "loss": 0.8544, "step": 21190 }, { "epoch": 1.85, "learning_rate": 4.074154948030396e-05, "loss": 0.9052, "step": 21200 }, { "epoch": 1.85, "learning_rate": 4.07371822866626e-05, "loss": 0.7815, "step": 21210 }, { "epoch": 1.85, "learning_rate": 4.0732815093021224e-05, "loss": 0.7964, "step": 21220 }, { "epoch": 1.85, "learning_rate": 4.0728447899379864e-05, "loss": 0.877, "step": 21230 }, { "epoch": 1.86, "learning_rate": 4.072408070573849e-05, "loss": 0.8209, "step": 21240 }, { "epoch": 1.86, "learning_rate": 4.071971351209713e-05, "loss": 0.9611, "step": 21250 }, { "epoch": 1.86, "learning_rate": 4.071534631845576e-05, "loss": 0.8529, "step": 21260 }, { "epoch": 1.86, "learning_rate": 4.07109791248144e-05, "loss": 0.8635, "step": 21270 }, { "epoch": 1.86, "learning_rate": 4.070661193117303e-05, "loss": 0.94, "step": 21280 }, { "epoch": 1.86, "learning_rate": 4.0702244737531664e-05, "loss": 0.9409, "step": 21290 }, { "epoch": 1.86, "learning_rate": 4.0697877543890304e-05, "loss": 0.9844, "step": 21300 }, { "epoch": 1.86, "learning_rate": 4.069351035024893e-05, "loss": 0.9141, "step": 21310 }, { "epoch": 1.86, "learning_rate": 4.068914315660757e-05, "loss": 0.9606, "step": 21320 }, { "epoch": 1.86, "learning_rate": 4.0684775962966196e-05, "loss": 0.8466, "step": 21330 }, { "epoch": 1.86, "learning_rate": 4.0680408769324836e-05, "loss": 0.8608, "step": 21340 }, { "epoch": 1.86, "learning_rate": 4.067604157568346e-05, "loss": 0.9744, "step": 21350 }, { "epoch": 1.87, "learning_rate": 4.06716743820421e-05, "loss": 0.9645, "step": 21360 }, { "epoch": 1.87, "learning_rate": 4.0667307188400736e-05, "loss": 0.8962, "step": 21370 }, { "epoch": 1.87, "learning_rate": 4.066293999475937e-05, "loss": 0.9371, "step": 21380 }, { "epoch": 1.87, "learning_rate": 4.0658572801118e-05, "loss": 0.975, "step": 21390 }, { "epoch": 1.87, "learning_rate": 4.0654205607476636e-05, "loss": 0.8093, "step": 21400 }, { "epoch": 1.87, "learning_rate": 4.0649838413835276e-05, "loss": 0.9707, "step": 21410 }, { "epoch": 1.87, "learning_rate": 4.06454712201939e-05, "loss": 0.8995, "step": 21420 }, { "epoch": 1.87, "learning_rate": 4.064110402655254e-05, "loss": 0.8861, "step": 21430 }, { "epoch": 1.87, "learning_rate": 4.063673683291117e-05, "loss": 0.9399, "step": 21440 }, { "epoch": 1.87, "learning_rate": 4.063236963926981e-05, "loss": 0.9286, "step": 21450 }, { "epoch": 1.87, "learning_rate": 4.062800244562844e-05, "loss": 0.9884, "step": 21460 }, { "epoch": 1.88, "learning_rate": 4.0623635251987075e-05, "loss": 0.9003, "step": 21470 }, { "epoch": 1.88, "learning_rate": 4.061926805834571e-05, "loss": 0.8981, "step": 21480 }, { "epoch": 1.88, "learning_rate": 4.061490086470434e-05, "loss": 0.924, "step": 21490 }, { "epoch": 1.88, "learning_rate": 4.061053367106298e-05, "loss": 0.9664, "step": 21500 }, { "epoch": 1.88, "learning_rate": 4.060616647742161e-05, "loss": 1.0078, "step": 21510 }, { "epoch": 1.88, "learning_rate": 4.060179928378025e-05, "loss": 0.9344, "step": 21520 }, { "epoch": 1.88, "learning_rate": 4.0597432090138874e-05, "loss": 0.9192, "step": 21530 }, { "epoch": 1.88, "learning_rate": 4.0593064896497514e-05, "loss": 0.9847, "step": 21540 }, { "epoch": 1.88, "learning_rate": 4.058869770285615e-05, "loss": 0.9889, "step": 21550 }, { "epoch": 1.88, "learning_rate": 4.058433050921478e-05, "loss": 0.8552, "step": 21560 }, { "epoch": 1.88, "learning_rate": 4.0579963315573414e-05, "loss": 0.8667, "step": 21570 }, { "epoch": 1.88, "learning_rate": 4.057559612193205e-05, "loss": 0.9019, "step": 21580 }, { "epoch": 1.89, "learning_rate": 4.057122892829068e-05, "loss": 0.8803, "step": 21590 }, { "epoch": 1.89, "learning_rate": 4.056686173464931e-05, "loss": 0.9645, "step": 21600 }, { "epoch": 1.89, "learning_rate": 4.056249454100795e-05, "loss": 0.8544, "step": 21610 }, { "epoch": 1.89, "learning_rate": 4.0558127347366586e-05, "loss": 0.8383, "step": 21620 }, { "epoch": 1.89, "learning_rate": 4.055376015372522e-05, "loss": 0.9774, "step": 21630 }, { "epoch": 1.89, "learning_rate": 4.054939296008385e-05, "loss": 0.8571, "step": 21640 }, { "epoch": 1.89, "learning_rate": 4.0545025766442486e-05, "loss": 0.7946, "step": 21650 }, { "epoch": 1.89, "learning_rate": 4.054065857280112e-05, "loss": 0.9535, "step": 21660 }, { "epoch": 1.89, "learning_rate": 4.053629137915975e-05, "loss": 0.9974, "step": 21670 }, { "epoch": 1.89, "learning_rate": 4.0531924185518386e-05, "loss": 0.8984, "step": 21680 }, { "epoch": 1.89, "learning_rate": 4.052755699187702e-05, "loss": 0.9009, "step": 21690 }, { "epoch": 1.9, "learning_rate": 4.052318979823566e-05, "loss": 0.8197, "step": 21700 }, { "epoch": 1.9, "learning_rate": 4.051882260459429e-05, "loss": 0.8758, "step": 21710 }, { "epoch": 1.9, "learning_rate": 4.0514455410952925e-05, "loss": 0.9092, "step": 21720 }, { "epoch": 1.9, "learning_rate": 4.051008821731156e-05, "loss": 0.8336, "step": 21730 }, { "epoch": 1.9, "learning_rate": 4.050572102367019e-05, "loss": 0.9101, "step": 21740 }, { "epoch": 1.9, "learning_rate": 4.0501353830028825e-05, "loss": 1.0601, "step": 21750 }, { "epoch": 1.9, "learning_rate": 4.049698663638746e-05, "loss": 0.9331, "step": 21760 }, { "epoch": 1.9, "learning_rate": 4.049261944274609e-05, "loss": 0.9243, "step": 21770 }, { "epoch": 1.9, "learning_rate": 4.0488252249104724e-05, "loss": 0.7971, "step": 21780 }, { "epoch": 1.9, "learning_rate": 4.048388505546336e-05, "loss": 0.909, "step": 21790 }, { "epoch": 1.9, "learning_rate": 4.0479517861822e-05, "loss": 0.8771, "step": 21800 }, { "epoch": 1.9, "learning_rate": 4.047515066818063e-05, "loss": 0.9831, "step": 21810 }, { "epoch": 1.91, "learning_rate": 4.0470783474539264e-05, "loss": 0.8827, "step": 21820 }, { "epoch": 1.91, "learning_rate": 4.04664162808979e-05, "loss": 0.8779, "step": 21830 }, { "epoch": 1.91, "learning_rate": 4.046204908725653e-05, "loss": 0.7628, "step": 21840 }, { "epoch": 1.91, "learning_rate": 4.0457681893615164e-05, "loss": 0.8307, "step": 21850 }, { "epoch": 1.91, "learning_rate": 4.04533146999738e-05, "loss": 0.9691, "step": 21860 }, { "epoch": 1.91, "learning_rate": 4.044894750633244e-05, "loss": 1.0388, "step": 21870 }, { "epoch": 1.91, "learning_rate": 4.044458031269106e-05, "loss": 0.9314, "step": 21880 }, { "epoch": 1.91, "learning_rate": 4.04402131190497e-05, "loss": 0.9269, "step": 21890 }, { "epoch": 1.91, "learning_rate": 4.0435845925408336e-05, "loss": 0.8417, "step": 21900 }, { "epoch": 1.91, "learning_rate": 4.043147873176697e-05, "loss": 0.8343, "step": 21910 }, { "epoch": 1.91, "learning_rate": 4.04271115381256e-05, "loss": 0.9138, "step": 21920 }, { "epoch": 1.92, "learning_rate": 4.0422744344484236e-05, "loss": 0.8187, "step": 21930 }, { "epoch": 1.92, "learning_rate": 4.041837715084287e-05, "loss": 0.9805, "step": 21940 }, { "epoch": 1.92, "learning_rate": 4.04140099572015e-05, "loss": 0.8165, "step": 21950 }, { "epoch": 1.92, "learning_rate": 4.040964276356014e-05, "loss": 1.0771, "step": 21960 }, { "epoch": 1.92, "learning_rate": 4.040527556991877e-05, "loss": 0.725, "step": 21970 }, { "epoch": 1.92, "learning_rate": 4.040090837627741e-05, "loss": 0.7819, "step": 21980 }, { "epoch": 1.92, "learning_rate": 4.0396541182636035e-05, "loss": 1.0414, "step": 21990 }, { "epoch": 1.92, "learning_rate": 4.0392173988994675e-05, "loss": 0.8359, "step": 22000 }, { "epoch": 1.92, "learning_rate": 4.038780679535331e-05, "loss": 0.8497, "step": 22010 }, { "epoch": 1.92, "learning_rate": 4.038343960171194e-05, "loss": 0.919, "step": 22020 }, { "epoch": 1.92, "learning_rate": 4.037907240807058e-05, "loss": 0.812, "step": 22030 }, { "epoch": 1.93, "learning_rate": 4.037470521442921e-05, "loss": 0.9045, "step": 22040 }, { "epoch": 1.93, "learning_rate": 4.037033802078785e-05, "loss": 0.8461, "step": 22050 }, { "epoch": 1.93, "learning_rate": 4.0365970827146474e-05, "loss": 0.9765, "step": 22060 }, { "epoch": 1.93, "learning_rate": 4.0361603633505114e-05, "loss": 0.9953, "step": 22070 }, { "epoch": 1.93, "learning_rate": 4.035723643986374e-05, "loss": 0.8718, "step": 22080 }, { "epoch": 1.93, "learning_rate": 4.035286924622238e-05, "loss": 0.9663, "step": 22090 }, { "epoch": 1.93, "learning_rate": 4.0348502052581014e-05, "loss": 0.9163, "step": 22100 }, { "epoch": 1.93, "learning_rate": 4.034413485893965e-05, "loss": 0.8368, "step": 22110 }, { "epoch": 1.93, "learning_rate": 4.033976766529828e-05, "loss": 0.8405, "step": 22120 }, { "epoch": 1.93, "learning_rate": 4.0335400471656914e-05, "loss": 0.9603, "step": 22130 }, { "epoch": 1.93, "learning_rate": 4.0331033278015554e-05, "loss": 0.9125, "step": 22140 }, { "epoch": 1.93, "learning_rate": 4.032666608437418e-05, "loss": 0.9466, "step": 22150 }, { "epoch": 1.94, "learning_rate": 4.032229889073282e-05, "loss": 0.8659, "step": 22160 }, { "epoch": 1.94, "learning_rate": 4.0317931697091446e-05, "loss": 0.8547, "step": 22170 }, { "epoch": 1.94, "learning_rate": 4.0313564503450086e-05, "loss": 0.9856, "step": 22180 }, { "epoch": 1.94, "learning_rate": 4.030919730980871e-05, "loss": 0.8859, "step": 22190 }, { "epoch": 1.94, "learning_rate": 4.030483011616735e-05, "loss": 0.7921, "step": 22200 }, { "epoch": 1.94, "learning_rate": 4.0300462922525986e-05, "loss": 0.9071, "step": 22210 }, { "epoch": 1.94, "learning_rate": 4.029609572888462e-05, "loss": 1.0703, "step": 22220 }, { "epoch": 1.94, "learning_rate": 4.029172853524326e-05, "loss": 0.9665, "step": 22230 }, { "epoch": 1.94, "learning_rate": 4.0287361341601886e-05, "loss": 1.0264, "step": 22240 }, { "epoch": 1.94, "learning_rate": 4.0282994147960526e-05, "loss": 0.9011, "step": 22250 }, { "epoch": 1.94, "learning_rate": 4.027862695431915e-05, "loss": 0.8626, "step": 22260 }, { "epoch": 1.95, "learning_rate": 4.027425976067779e-05, "loss": 0.8657, "step": 22270 }, { "epoch": 1.95, "learning_rate": 4.0269892567036425e-05, "loss": 0.712, "step": 22280 }, { "epoch": 1.95, "learning_rate": 4.026552537339506e-05, "loss": 0.9101, "step": 22290 }, { "epoch": 1.95, "learning_rate": 4.026115817975369e-05, "loss": 0.9167, "step": 22300 }, { "epoch": 1.95, "learning_rate": 4.0256790986112325e-05, "loss": 0.9731, "step": 22310 }, { "epoch": 1.95, "learning_rate": 4.025242379247096e-05, "loss": 0.8731, "step": 22320 }, { "epoch": 1.95, "learning_rate": 4.024805659882959e-05, "loss": 0.8458, "step": 22330 }, { "epoch": 1.95, "learning_rate": 4.024368940518823e-05, "loss": 0.9359, "step": 22340 }, { "epoch": 1.95, "learning_rate": 4.023932221154686e-05, "loss": 0.8079, "step": 22350 }, { "epoch": 1.95, "learning_rate": 4.02349550179055e-05, "loss": 0.9221, "step": 22360 }, { "epoch": 1.95, "learning_rate": 4.023058782426413e-05, "loss": 0.8865, "step": 22370 }, { "epoch": 1.95, "learning_rate": 4.0226220630622764e-05, "loss": 0.827, "step": 22380 }, { "epoch": 1.96, "learning_rate": 4.02218534369814e-05, "loss": 0.8529, "step": 22390 }, { "epoch": 1.96, "learning_rate": 4.021748624334003e-05, "loss": 0.8286, "step": 22400 }, { "epoch": 1.96, "learning_rate": 4.0213119049698664e-05, "loss": 0.9091, "step": 22410 }, { "epoch": 1.96, "learning_rate": 4.02087518560573e-05, "loss": 0.8375, "step": 22420 }, { "epoch": 1.96, "learning_rate": 4.020438466241594e-05, "loss": 0.8778, "step": 22430 }, { "epoch": 1.96, "learning_rate": 4.020001746877457e-05, "loss": 0.916, "step": 22440 }, { "epoch": 1.96, "learning_rate": 4.01956502751332e-05, "loss": 0.7706, "step": 22450 }, { "epoch": 1.96, "learning_rate": 4.0191283081491836e-05, "loss": 1.0034, "step": 22460 }, { "epoch": 1.96, "learning_rate": 4.018691588785047e-05, "loss": 0.9329, "step": 22470 }, { "epoch": 1.96, "learning_rate": 4.01825486942091e-05, "loss": 0.9582, "step": 22480 }, { "epoch": 1.96, "learning_rate": 4.0178181500567736e-05, "loss": 0.8396, "step": 22490 }, { "epoch": 1.97, "learning_rate": 4.017381430692637e-05, "loss": 0.9684, "step": 22500 }, { "epoch": 1.97, "learning_rate": 4.0169447113285e-05, "loss": 0.8473, "step": 22510 }, { "epoch": 1.97, "learning_rate": 4.0165079919643636e-05, "loss": 0.8606, "step": 22520 }, { "epoch": 1.97, "learning_rate": 4.0160712726002276e-05, "loss": 0.8702, "step": 22530 }, { "epoch": 1.97, "learning_rate": 4.015634553236091e-05, "loss": 0.9901, "step": 22540 }, { "epoch": 1.97, "learning_rate": 4.015197833871954e-05, "loss": 0.8046, "step": 22550 }, { "epoch": 1.97, "learning_rate": 4.0147611145078175e-05, "loss": 0.8869, "step": 22560 }, { "epoch": 1.97, "learning_rate": 4.014324395143681e-05, "loss": 0.9871, "step": 22570 }, { "epoch": 1.97, "learning_rate": 4.013887675779544e-05, "loss": 1.0331, "step": 22580 }, { "epoch": 1.97, "learning_rate": 4.0134509564154075e-05, "loss": 0.8722, "step": 22590 }, { "epoch": 1.97, "learning_rate": 4.013014237051271e-05, "loss": 0.9293, "step": 22600 }, { "epoch": 1.97, "learning_rate": 4.012577517687134e-05, "loss": 0.9316, "step": 22610 }, { "epoch": 1.98, "learning_rate": 4.012140798322998e-05, "loss": 0.8034, "step": 22620 }, { "epoch": 1.98, "learning_rate": 4.0117040789588614e-05, "loss": 0.8186, "step": 22630 }, { "epoch": 1.98, "learning_rate": 4.011267359594725e-05, "loss": 0.8013, "step": 22640 }, { "epoch": 1.98, "learning_rate": 4.010830640230588e-05, "loss": 0.9372, "step": 22650 }, { "epoch": 1.98, "learning_rate": 4.0103939208664514e-05, "loss": 0.9258, "step": 22660 }, { "epoch": 1.98, "learning_rate": 4.009957201502315e-05, "loss": 0.9477, "step": 22670 }, { "epoch": 1.98, "learning_rate": 4.009520482138178e-05, "loss": 0.8514, "step": 22680 }, { "epoch": 1.98, "learning_rate": 4.009083762774042e-05, "loss": 0.9309, "step": 22690 }, { "epoch": 1.98, "learning_rate": 4.008647043409905e-05, "loss": 0.8842, "step": 22700 }, { "epoch": 1.98, "learning_rate": 4.008210324045769e-05, "loss": 0.9121, "step": 22710 }, { "epoch": 1.98, "learning_rate": 4.007773604681631e-05, "loss": 0.8357, "step": 22720 }, { "epoch": 1.99, "learning_rate": 4.007336885317495e-05, "loss": 0.957, "step": 22730 }, { "epoch": 1.99, "learning_rate": 4.0069001659533586e-05, "loss": 0.9514, "step": 22740 }, { "epoch": 1.99, "learning_rate": 4.006463446589222e-05, "loss": 0.9317, "step": 22750 }, { "epoch": 1.99, "learning_rate": 4.006026727225085e-05, "loss": 1.0788, "step": 22760 }, { "epoch": 1.99, "learning_rate": 4.0055900078609486e-05, "loss": 0.7874, "step": 22770 }, { "epoch": 1.99, "learning_rate": 4.0051532884968126e-05, "loss": 0.8283, "step": 22780 }, { "epoch": 1.99, "learning_rate": 4.004716569132675e-05, "loss": 0.8525, "step": 22790 }, { "epoch": 1.99, "learning_rate": 4.004279849768539e-05, "loss": 0.8768, "step": 22800 }, { "epoch": 1.99, "learning_rate": 4.003843130404402e-05, "loss": 0.8875, "step": 22810 }, { "epoch": 1.99, "learning_rate": 4.003406411040266e-05, "loss": 0.9899, "step": 22820 }, { "epoch": 1.99, "learning_rate": 4.002969691676129e-05, "loss": 0.9096, "step": 22830 }, { "epoch": 1.99, "learning_rate": 4.0025329723119925e-05, "loss": 0.9522, "step": 22840 }, { "epoch": 2.0, "learning_rate": 4.002096252947856e-05, "loss": 0.8351, "step": 22850 }, { "epoch": 2.0, "learning_rate": 4.001659533583719e-05, "loss": 0.967, "step": 22860 }, { "epoch": 2.0, "learning_rate": 4.001222814219583e-05, "loss": 0.9432, "step": 22870 }, { "epoch": 2.0, "learning_rate": 4.000786094855446e-05, "loss": 0.8261, "step": 22880 }, { "epoch": 2.0, "learning_rate": 4.00034937549131e-05, "loss": 0.9316, "step": 22890 }, { "epoch": 2.0, "learning_rate": 3.9999126561271724e-05, "loss": 0.914, "step": 22900 }, { "epoch": 2.0, "learning_rate": 3.9994759367630364e-05, "loss": 0.7597, "step": 22910 }, { "epoch": 2.0, "learning_rate": 3.999039217398899e-05, "loss": 0.9356, "step": 22920 }, { "epoch": 2.0, "learning_rate": 3.998602498034763e-05, "loss": 1.0288, "step": 22930 }, { "epoch": 2.0, "learning_rate": 3.9981657786706264e-05, "loss": 0.9847, "step": 22940 }, { "epoch": 2.0, "learning_rate": 3.99772905930649e-05, "loss": 0.9289, "step": 22950 }, { "epoch": 2.01, "learning_rate": 3.997292339942354e-05, "loss": 0.8002, "step": 22960 }, { "epoch": 2.01, "learning_rate": 3.9968556205782164e-05, "loss": 0.8894, "step": 22970 }, { "epoch": 2.01, "learning_rate": 3.9964189012140804e-05, "loss": 0.8822, "step": 22980 }, { "epoch": 2.01, "learning_rate": 3.995982181849943e-05, "loss": 1.0376, "step": 22990 }, { "epoch": 2.01, "learning_rate": 3.995545462485807e-05, "loss": 0.7726, "step": 23000 }, { "epoch": 2.01, "learning_rate": 3.9951087431216697e-05, "loss": 0.8633, "step": 23010 }, { "epoch": 2.01, "learning_rate": 3.9946720237575336e-05, "loss": 0.9781, "step": 23020 }, { "epoch": 2.01, "learning_rate": 3.994235304393397e-05, "loss": 0.9212, "step": 23030 }, { "epoch": 2.01, "learning_rate": 3.99379858502926e-05, "loss": 0.8477, "step": 23040 }, { "epoch": 2.01, "learning_rate": 3.9933618656651236e-05, "loss": 0.9468, "step": 23050 }, { "epoch": 2.01, "learning_rate": 3.992925146300987e-05, "loss": 0.8713, "step": 23060 }, { "epoch": 2.02, "learning_rate": 3.992488426936851e-05, "loss": 0.9488, "step": 23070 }, { "epoch": 2.02, "learning_rate": 3.9920517075727136e-05, "loss": 0.889, "step": 23080 }, { "epoch": 2.02, "learning_rate": 3.9916149882085776e-05, "loss": 0.9408, "step": 23090 }, { "epoch": 2.02, "learning_rate": 3.991178268844441e-05, "loss": 0.8618, "step": 23100 }, { "epoch": 2.02, "learning_rate": 3.990741549480304e-05, "loss": 0.8909, "step": 23110 }, { "epoch": 2.02, "learning_rate": 3.9903048301161675e-05, "loss": 0.773, "step": 23120 }, { "epoch": 2.02, "learning_rate": 3.989868110752031e-05, "loss": 1.0007, "step": 23130 }, { "epoch": 2.02, "learning_rate": 3.989431391387894e-05, "loss": 0.852, "step": 23140 }, { "epoch": 2.02, "learning_rate": 3.9889946720237575e-05, "loss": 0.9066, "step": 23150 }, { "epoch": 2.02, "learning_rate": 3.9885579526596215e-05, "loss": 0.8275, "step": 23160 }, { "epoch": 2.02, "learning_rate": 3.988121233295484e-05, "loss": 0.8664, "step": 23170 }, { "epoch": 2.02, "learning_rate": 3.987684513931348e-05, "loss": 0.7777, "step": 23180 }, { "epoch": 2.03, "learning_rate": 3.9872477945672114e-05, "loss": 0.9574, "step": 23190 }, { "epoch": 2.03, "learning_rate": 3.986811075203075e-05, "loss": 0.8532, "step": 23200 }, { "epoch": 2.03, "learning_rate": 3.986374355838938e-05, "loss": 0.8032, "step": 23210 }, { "epoch": 2.03, "learning_rate": 3.9859376364748014e-05, "loss": 0.985, "step": 23220 }, { "epoch": 2.03, "learning_rate": 3.985500917110665e-05, "loss": 0.9106, "step": 23230 }, { "epoch": 2.03, "learning_rate": 3.985064197746528e-05, "loss": 0.8986, "step": 23240 }, { "epoch": 2.03, "learning_rate": 3.9846274783823914e-05, "loss": 0.9993, "step": 23250 }, { "epoch": 2.03, "learning_rate": 3.9841907590182554e-05, "loss": 0.8737, "step": 23260 }, { "epoch": 2.03, "learning_rate": 3.983754039654119e-05, "loss": 0.9188, "step": 23270 }, { "epoch": 2.03, "learning_rate": 3.983317320289982e-05, "loss": 0.823, "step": 23280 }, { "epoch": 2.03, "learning_rate": 3.982880600925845e-05, "loss": 0.9166, "step": 23290 }, { "epoch": 2.04, "learning_rate": 3.9824438815617086e-05, "loss": 0.897, "step": 23300 }, { "epoch": 2.04, "learning_rate": 3.982007162197572e-05, "loss": 0.8725, "step": 23310 }, { "epoch": 2.04, "learning_rate": 3.981570442833435e-05, "loss": 0.9018, "step": 23320 }, { "epoch": 2.04, "learning_rate": 3.9811337234692986e-05, "loss": 0.8738, "step": 23330 }, { "epoch": 2.04, "learning_rate": 3.980697004105162e-05, "loss": 0.9419, "step": 23340 }, { "epoch": 2.04, "learning_rate": 3.980260284741026e-05, "loss": 0.8153, "step": 23350 }, { "epoch": 2.04, "learning_rate": 3.979823565376889e-05, "loss": 0.8514, "step": 23360 }, { "epoch": 2.04, "learning_rate": 3.9793868460127526e-05, "loss": 0.909, "step": 23370 }, { "epoch": 2.04, "learning_rate": 3.978950126648616e-05, "loss": 0.998, "step": 23380 }, { "epoch": 2.04, "learning_rate": 3.978513407284479e-05, "loss": 0.8643, "step": 23390 }, { "epoch": 2.04, "learning_rate": 3.9780766879203425e-05, "loss": 0.919, "step": 23400 }, { "epoch": 2.04, "learning_rate": 3.977639968556206e-05, "loss": 0.9264, "step": 23410 }, { "epoch": 2.05, "learning_rate": 3.977203249192069e-05, "loss": 0.88, "step": 23420 }, { "epoch": 2.05, "learning_rate": 3.9767665298279325e-05, "loss": 0.9223, "step": 23430 }, { "epoch": 2.05, "learning_rate": 3.9763298104637965e-05, "loss": 0.8655, "step": 23440 }, { "epoch": 2.05, "learning_rate": 3.975893091099659e-05, "loss": 0.8948, "step": 23450 }, { "epoch": 2.05, "learning_rate": 3.975456371735523e-05, "loss": 0.899, "step": 23460 }, { "epoch": 2.05, "learning_rate": 3.9750196523713864e-05, "loss": 0.8429, "step": 23470 }, { "epoch": 2.05, "learning_rate": 3.97458293300725e-05, "loss": 0.7953, "step": 23480 }, { "epoch": 2.05, "learning_rate": 3.974146213643113e-05, "loss": 0.8256, "step": 23490 }, { "epoch": 2.05, "learning_rate": 3.9737094942789764e-05, "loss": 0.7756, "step": 23500 }, { "epoch": 2.05, "learning_rate": 3.9732727749148404e-05, "loss": 0.958, "step": 23510 }, { "epoch": 2.05, "learning_rate": 3.972836055550703e-05, "loss": 0.93, "step": 23520 }, { "epoch": 2.06, "learning_rate": 3.972399336186567e-05, "loss": 0.865, "step": 23530 }, { "epoch": 2.06, "learning_rate": 3.97196261682243e-05, "loss": 0.8069, "step": 23540 }, { "epoch": 2.06, "learning_rate": 3.971525897458294e-05, "loss": 0.9169, "step": 23550 }, { "epoch": 2.06, "learning_rate": 3.971089178094157e-05, "loss": 0.8527, "step": 23560 }, { "epoch": 2.06, "learning_rate": 3.97065245873002e-05, "loss": 0.9844, "step": 23570 }, { "epoch": 2.06, "learning_rate": 3.9702157393658837e-05, "loss": 0.8899, "step": 23580 }, { "epoch": 2.06, "learning_rate": 3.969779020001747e-05, "loss": 0.8415, "step": 23590 }, { "epoch": 2.06, "learning_rate": 3.969342300637611e-05, "loss": 0.8295, "step": 23600 }, { "epoch": 2.06, "learning_rate": 3.9689055812734736e-05, "loss": 0.939, "step": 23610 }, { "epoch": 2.06, "learning_rate": 3.9684688619093376e-05, "loss": 0.8631, "step": 23620 }, { "epoch": 2.06, "learning_rate": 3.9680321425452e-05, "loss": 0.8957, "step": 23630 }, { "epoch": 2.06, "learning_rate": 3.967595423181064e-05, "loss": 0.8927, "step": 23640 }, { "epoch": 2.07, "learning_rate": 3.967158703816927e-05, "loss": 0.8832, "step": 23650 }, { "epoch": 2.07, "learning_rate": 3.966721984452791e-05, "loss": 0.8359, "step": 23660 }, { "epoch": 2.07, "learning_rate": 3.966285265088654e-05, "loss": 0.9299, "step": 23670 }, { "epoch": 2.07, "learning_rate": 3.9658485457245175e-05, "loss": 0.8928, "step": 23680 }, { "epoch": 2.07, "learning_rate": 3.9654118263603815e-05, "loss": 1.0467, "step": 23690 }, { "epoch": 2.07, "learning_rate": 3.964975106996244e-05, "loss": 0.9758, "step": 23700 }, { "epoch": 2.07, "learning_rate": 3.964538387632108e-05, "loss": 0.9251, "step": 23710 }, { "epoch": 2.07, "learning_rate": 3.964101668267971e-05, "loss": 0.8069, "step": 23720 }, { "epoch": 2.07, "learning_rate": 3.963664948903835e-05, "loss": 0.9514, "step": 23730 }, { "epoch": 2.07, "learning_rate": 3.9632282295396975e-05, "loss": 0.8373, "step": 23740 }, { "epoch": 2.07, "learning_rate": 3.9627915101755615e-05, "loss": 0.8144, "step": 23750 }, { "epoch": 2.08, "learning_rate": 3.962354790811425e-05, "loss": 0.917, "step": 23760 }, { "epoch": 2.08, "learning_rate": 3.961918071447288e-05, "loss": 0.9509, "step": 23770 }, { "epoch": 2.08, "learning_rate": 3.9614813520831514e-05, "loss": 0.8924, "step": 23780 }, { "epoch": 2.08, "learning_rate": 3.961044632719015e-05, "loss": 0.9203, "step": 23790 }, { "epoch": 2.08, "learning_rate": 3.960607913354879e-05, "loss": 0.9308, "step": 23800 }, { "epoch": 2.08, "learning_rate": 3.9601711939907414e-05, "loss": 0.8257, "step": 23810 }, { "epoch": 2.08, "learning_rate": 3.9597344746266054e-05, "loss": 0.9576, "step": 23820 }, { "epoch": 2.08, "learning_rate": 3.959297755262468e-05, "loss": 0.938, "step": 23830 }, { "epoch": 2.08, "learning_rate": 3.958861035898332e-05, "loss": 0.9299, "step": 23840 }, { "epoch": 2.08, "learning_rate": 3.958424316534195e-05, "loss": 0.8273, "step": 23850 }, { "epoch": 2.08, "learning_rate": 3.9579875971700587e-05, "loss": 1.036, "step": 23860 }, { "epoch": 2.08, "learning_rate": 3.957550877805922e-05, "loss": 0.8539, "step": 23870 }, { "epoch": 2.09, "learning_rate": 3.957114158441785e-05, "loss": 0.8875, "step": 23880 }, { "epoch": 2.09, "learning_rate": 3.956677439077649e-05, "loss": 0.879, "step": 23890 }, { "epoch": 2.09, "learning_rate": 3.956240719713512e-05, "loss": 0.9686, "step": 23900 }, { "epoch": 2.09, "learning_rate": 3.955804000349376e-05, "loss": 0.955, "step": 23910 }, { "epoch": 2.09, "learning_rate": 3.955367280985239e-05, "loss": 0.9249, "step": 23920 }, { "epoch": 2.09, "learning_rate": 3.9549305616211026e-05, "loss": 0.905, "step": 23930 }, { "epoch": 2.09, "learning_rate": 3.954493842256966e-05, "loss": 0.9022, "step": 23940 }, { "epoch": 2.09, "learning_rate": 3.954057122892829e-05, "loss": 0.9259, "step": 23950 }, { "epoch": 2.09, "learning_rate": 3.9536204035286925e-05, "loss": 0.9778, "step": 23960 }, { "epoch": 2.09, "learning_rate": 3.953183684164556e-05, "loss": 0.7921, "step": 23970 }, { "epoch": 2.09, "learning_rate": 3.952746964800419e-05, "loss": 0.8971, "step": 23980 }, { "epoch": 2.1, "learning_rate": 3.9523102454362825e-05, "loss": 0.9093, "step": 23990 }, { "epoch": 2.1, "learning_rate": 3.9518735260721465e-05, "loss": 0.9566, "step": 24000 }, { "epoch": 2.1, "learning_rate": 3.95143680670801e-05, "loss": 0.8507, "step": 24010 }, { "epoch": 2.1, "learning_rate": 3.951000087343873e-05, "loss": 0.7779, "step": 24020 }, { "epoch": 2.1, "learning_rate": 3.9505633679797365e-05, "loss": 0.8863, "step": 24030 }, { "epoch": 2.1, "learning_rate": 3.9501266486156e-05, "loss": 0.931, "step": 24040 }, { "epoch": 2.1, "learning_rate": 3.949689929251463e-05, "loss": 0.9493, "step": 24050 }, { "epoch": 2.1, "learning_rate": 3.9492532098873264e-05, "loss": 0.8711, "step": 24060 }, { "epoch": 2.1, "learning_rate": 3.94881649052319e-05, "loss": 1.0326, "step": 24070 }, { "epoch": 2.1, "learning_rate": 3.948379771159053e-05, "loss": 0.8192, "step": 24080 }, { "epoch": 2.1, "learning_rate": 3.947943051794917e-05, "loss": 0.888, "step": 24090 }, { "epoch": 2.1, "learning_rate": 3.9475063324307804e-05, "loss": 0.8611, "step": 24100 }, { "epoch": 2.11, "learning_rate": 3.947069613066644e-05, "loss": 0.9298, "step": 24110 }, { "epoch": 2.11, "learning_rate": 3.946632893702507e-05, "loss": 0.9856, "step": 24120 }, { "epoch": 2.11, "learning_rate": 3.94619617433837e-05, "loss": 0.8074, "step": 24130 }, { "epoch": 2.11, "learning_rate": 3.9457594549742337e-05, "loss": 0.8909, "step": 24140 }, { "epoch": 2.11, "learning_rate": 3.945322735610097e-05, "loss": 0.9486, "step": 24150 }, { "epoch": 2.11, "learning_rate": 3.94488601624596e-05, "loss": 0.8203, "step": 24160 }, { "epoch": 2.11, "learning_rate": 3.944449296881824e-05, "loss": 0.7625, "step": 24170 }, { "epoch": 2.11, "learning_rate": 3.944012577517687e-05, "loss": 0.8786, "step": 24180 }, { "epoch": 2.11, "learning_rate": 3.943575858153551e-05, "loss": 0.9183, "step": 24190 }, { "epoch": 2.11, "learning_rate": 3.943139138789414e-05, "loss": 0.8982, "step": 24200 }, { "epoch": 2.11, "learning_rate": 3.9427024194252776e-05, "loss": 0.8998, "step": 24210 }, { "epoch": 2.12, "learning_rate": 3.942265700061141e-05, "loss": 1.0057, "step": 24220 }, { "epoch": 2.12, "learning_rate": 3.941828980697004e-05, "loss": 0.9163, "step": 24230 }, { "epoch": 2.12, "learning_rate": 3.9413922613328675e-05, "loss": 0.9484, "step": 24240 }, { "epoch": 2.12, "learning_rate": 3.940955541968731e-05, "loss": 0.838, "step": 24250 }, { "epoch": 2.12, "learning_rate": 3.940518822604595e-05, "loss": 0.8994, "step": 24260 }, { "epoch": 2.12, "learning_rate": 3.9400821032404575e-05, "loss": 0.8689, "step": 24270 }, { "epoch": 2.12, "learning_rate": 3.9396453838763215e-05, "loss": 0.9182, "step": 24280 }, { "epoch": 2.12, "learning_rate": 3.939208664512185e-05, "loss": 0.9549, "step": 24290 }, { "epoch": 2.12, "learning_rate": 3.938771945148048e-05, "loss": 0.8033, "step": 24300 }, { "epoch": 2.12, "learning_rate": 3.9383352257839115e-05, "loss": 0.7709, "step": 24310 }, { "epoch": 2.12, "learning_rate": 3.937898506419775e-05, "loss": 0.8909, "step": 24320 }, { "epoch": 2.13, "learning_rate": 3.937461787055639e-05, "loss": 0.8917, "step": 24330 }, { "epoch": 2.13, "learning_rate": 3.9370250676915014e-05, "loss": 0.8926, "step": 24340 }, { "epoch": 2.13, "learning_rate": 3.9365883483273654e-05, "loss": 0.8432, "step": 24350 }, { "epoch": 2.13, "learning_rate": 3.936151628963228e-05, "loss": 0.8272, "step": 24360 }, { "epoch": 2.13, "learning_rate": 3.935714909599092e-05, "loss": 0.9043, "step": 24370 }, { "epoch": 2.13, "learning_rate": 3.935278190234955e-05, "loss": 0.7532, "step": 24380 }, { "epoch": 2.13, "learning_rate": 3.934841470870819e-05, "loss": 0.8199, "step": 24390 }, { "epoch": 2.13, "learning_rate": 3.934404751506682e-05, "loss": 1.0203, "step": 24400 }, { "epoch": 2.13, "learning_rate": 3.933968032142545e-05, "loss": 0.8517, "step": 24410 }, { "epoch": 2.13, "learning_rate": 3.933531312778409e-05, "loss": 1.021, "step": 24420 }, { "epoch": 2.13, "learning_rate": 3.933094593414272e-05, "loss": 0.8812, "step": 24430 }, { "epoch": 2.13, "learning_rate": 3.932657874050136e-05, "loss": 0.9152, "step": 24440 }, { "epoch": 2.14, "learning_rate": 3.9322211546859986e-05, "loss": 0.9862, "step": 24450 }, { "epoch": 2.14, "learning_rate": 3.9317844353218626e-05, "loss": 1.0007, "step": 24460 }, { "epoch": 2.14, "learning_rate": 3.931347715957725e-05, "loss": 0.9025, "step": 24470 }, { "epoch": 2.14, "learning_rate": 3.930910996593589e-05, "loss": 0.8218, "step": 24480 }, { "epoch": 2.14, "learning_rate": 3.9304742772294526e-05, "loss": 1.0014, "step": 24490 }, { "epoch": 2.14, "learning_rate": 3.930037557865316e-05, "loss": 0.9155, "step": 24500 }, { "epoch": 2.14, "learning_rate": 3.929600838501179e-05, "loss": 0.9372, "step": 24510 }, { "epoch": 2.14, "learning_rate": 3.9291641191370425e-05, "loss": 0.8334, "step": 24520 }, { "epoch": 2.14, "learning_rate": 3.9287273997729065e-05, "loss": 0.8021, "step": 24530 }, { "epoch": 2.14, "learning_rate": 3.928290680408769e-05, "loss": 0.96, "step": 24540 }, { "epoch": 2.14, "learning_rate": 3.927853961044633e-05, "loss": 0.8882, "step": 24550 }, { "epoch": 2.15, "learning_rate": 3.927417241680496e-05, "loss": 0.7146, "step": 24560 }, { "epoch": 2.15, "learning_rate": 3.92698052231636e-05, "loss": 0.9438, "step": 24570 }, { "epoch": 2.15, "learning_rate": 3.926543802952223e-05, "loss": 0.8961, "step": 24580 }, { "epoch": 2.15, "learning_rate": 3.9261070835880865e-05, "loss": 0.8391, "step": 24590 }, { "epoch": 2.15, "learning_rate": 3.92567036422395e-05, "loss": 0.8944, "step": 24600 }, { "epoch": 2.15, "learning_rate": 3.925233644859813e-05, "loss": 0.832, "step": 24610 }, { "epoch": 2.15, "learning_rate": 3.924796925495677e-05, "loss": 0.9138, "step": 24620 }, { "epoch": 2.15, "learning_rate": 3.92436020613154e-05, "loss": 0.8235, "step": 24630 }, { "epoch": 2.15, "learning_rate": 3.923923486767404e-05, "loss": 0.848, "step": 24640 }, { "epoch": 2.15, "learning_rate": 3.9234867674032664e-05, "loss": 0.8115, "step": 24650 }, { "epoch": 2.15, "learning_rate": 3.9230500480391304e-05, "loss": 1.0193, "step": 24660 }, { "epoch": 2.15, "learning_rate": 3.922613328674994e-05, "loss": 0.9207, "step": 24670 }, { "epoch": 2.16, "learning_rate": 3.922176609310857e-05, "loss": 0.9943, "step": 24680 }, { "epoch": 2.16, "learning_rate": 3.92173988994672e-05, "loss": 0.9637, "step": 24690 }, { "epoch": 2.16, "learning_rate": 3.9213031705825837e-05, "loss": 0.8413, "step": 24700 }, { "epoch": 2.16, "learning_rate": 3.920866451218447e-05, "loss": 0.9529, "step": 24710 }, { "epoch": 2.16, "learning_rate": 3.92042973185431e-05, "loss": 1.0203, "step": 24720 }, { "epoch": 2.16, "learning_rate": 3.919993012490174e-05, "loss": 0.8928, "step": 24730 }, { "epoch": 2.16, "learning_rate": 3.9195562931260376e-05, "loss": 0.8102, "step": 24740 }, { "epoch": 2.16, "learning_rate": 3.919119573761901e-05, "loss": 0.8577, "step": 24750 }, { "epoch": 2.16, "learning_rate": 3.918682854397764e-05, "loss": 0.8211, "step": 24760 }, { "epoch": 2.16, "learning_rate": 3.9182461350336276e-05, "loss": 0.8001, "step": 24770 }, { "epoch": 2.16, "learning_rate": 3.917809415669491e-05, "loss": 1.0066, "step": 24780 }, { "epoch": 2.17, "learning_rate": 3.917372696305354e-05, "loss": 0.8596, "step": 24790 }, { "epoch": 2.17, "learning_rate": 3.9169359769412175e-05, "loss": 0.9382, "step": 24800 }, { "epoch": 2.17, "learning_rate": 3.916499257577081e-05, "loss": 0.872, "step": 24810 }, { "epoch": 2.17, "learning_rate": 3.916062538212945e-05, "loss": 0.7562, "step": 24820 }, { "epoch": 2.17, "learning_rate": 3.915625818848808e-05, "loss": 0.7974, "step": 24830 }, { "epoch": 2.17, "learning_rate": 3.9151890994846715e-05, "loss": 0.8388, "step": 24840 }, { "epoch": 2.17, "learning_rate": 3.914752380120535e-05, "loss": 0.8599, "step": 24850 }, { "epoch": 2.17, "learning_rate": 3.914315660756398e-05, "loss": 0.8674, "step": 24860 }, { "epoch": 2.17, "learning_rate": 3.9138789413922615e-05, "loss": 0.8596, "step": 24870 }, { "epoch": 2.17, "learning_rate": 3.913442222028125e-05, "loss": 0.9782, "step": 24880 }, { "epoch": 2.17, "learning_rate": 3.913005502663988e-05, "loss": 0.8244, "step": 24890 }, { "epoch": 2.17, "learning_rate": 3.9125687832998514e-05, "loss": 0.903, "step": 24900 }, { "epoch": 2.18, "learning_rate": 3.912132063935715e-05, "loss": 0.9247, "step": 24910 }, { "epoch": 2.18, "learning_rate": 3.911695344571579e-05, "loss": 0.8638, "step": 24920 }, { "epoch": 2.18, "learning_rate": 3.911258625207442e-05, "loss": 0.8429, "step": 24930 }, { "epoch": 2.18, "learning_rate": 3.9108219058433054e-05, "loss": 0.8551, "step": 24940 }, { "epoch": 2.18, "learning_rate": 3.910385186479169e-05, "loss": 0.8449, "step": 24950 }, { "epoch": 2.18, "learning_rate": 3.909948467115032e-05, "loss": 0.7967, "step": 24960 }, { "epoch": 2.18, "learning_rate": 3.909511747750895e-05, "loss": 0.9304, "step": 24970 }, { "epoch": 2.18, "learning_rate": 3.9090750283867587e-05, "loss": 0.9562, "step": 24980 }, { "epoch": 2.18, "learning_rate": 3.9086383090226227e-05, "loss": 0.9337, "step": 24990 }, { "epoch": 2.18, "learning_rate": 3.908201589658485e-05, "loss": 1.0566, "step": 25000 }, { "epoch": 2.18, "eval_accuracy": 0.5722708067210376, "eval_loss": 0.893390417098999, "eval_runtime": 84.0906, "eval_samples_per_second": 121.024, "eval_steps_per_second": 15.138, "step": 25000 }, { "epoch": 2.18, "learning_rate": 3.907764870294349e-05, "loss": 0.9265, "step": 25010 }, { "epoch": 2.19, "learning_rate": 3.9073281509302126e-05, "loss": 0.854, "step": 25020 }, { "epoch": 2.19, "learning_rate": 3.906891431566076e-05, "loss": 0.8346, "step": 25030 }, { "epoch": 2.19, "learning_rate": 3.906454712201939e-05, "loss": 0.9734, "step": 25040 }, { "epoch": 2.19, "learning_rate": 3.9060179928378026e-05, "loss": 0.8484, "step": 25050 }, { "epoch": 2.19, "learning_rate": 3.905581273473666e-05, "loss": 0.8578, "step": 25060 }, { "epoch": 2.19, "learning_rate": 3.905144554109529e-05, "loss": 0.9587, "step": 25070 }, { "epoch": 2.19, "learning_rate": 3.904707834745393e-05, "loss": 0.8706, "step": 25080 }, { "epoch": 2.19, "learning_rate": 3.904271115381256e-05, "loss": 0.8656, "step": 25090 }, { "epoch": 2.19, "learning_rate": 3.90383439601712e-05, "loss": 0.9157, "step": 25100 }, { "epoch": 2.19, "learning_rate": 3.903397676652983e-05, "loss": 0.8974, "step": 25110 }, { "epoch": 2.19, "learning_rate": 3.9029609572888465e-05, "loss": 0.9538, "step": 25120 }, { "epoch": 2.19, "learning_rate": 3.90252423792471e-05, "loss": 0.8911, "step": 25130 }, { "epoch": 2.2, "learning_rate": 3.902087518560573e-05, "loss": 0.9247, "step": 25140 }, { "epoch": 2.2, "learning_rate": 3.901650799196437e-05, "loss": 0.9344, "step": 25150 }, { "epoch": 2.2, "learning_rate": 3.9012140798323e-05, "loss": 0.8821, "step": 25160 }, { "epoch": 2.2, "learning_rate": 3.900777360468164e-05, "loss": 0.8694, "step": 25170 }, { "epoch": 2.2, "learning_rate": 3.9003406411040264e-05, "loss": 0.7757, "step": 25180 }, { "epoch": 2.2, "learning_rate": 3.8999039217398904e-05, "loss": 0.9059, "step": 25190 }, { "epoch": 2.2, "learning_rate": 3.899467202375753e-05, "loss": 0.9391, "step": 25200 }, { "epoch": 2.2, "learning_rate": 3.899030483011617e-05, "loss": 0.9832, "step": 25210 }, { "epoch": 2.2, "learning_rate": 3.8985937636474804e-05, "loss": 0.876, "step": 25220 }, { "epoch": 2.2, "learning_rate": 3.898157044283344e-05, "loss": 0.907, "step": 25230 }, { "epoch": 2.2, "learning_rate": 3.897720324919207e-05, "loss": 0.8353, "step": 25240 }, { "epoch": 2.21, "learning_rate": 3.89728360555507e-05, "loss": 0.9534, "step": 25250 }, { "epoch": 2.21, "learning_rate": 3.896846886190934e-05, "loss": 0.7413, "step": 25260 }, { "epoch": 2.21, "learning_rate": 3.896410166826797e-05, "loss": 0.9761, "step": 25270 }, { "epoch": 2.21, "learning_rate": 3.895973447462661e-05, "loss": 0.9094, "step": 25280 }, { "epoch": 2.21, "learning_rate": 3.8955367280985236e-05, "loss": 0.8208, "step": 25290 }, { "epoch": 2.21, "learning_rate": 3.8951000087343876e-05, "loss": 0.879, "step": 25300 }, { "epoch": 2.21, "learning_rate": 3.894663289370251e-05, "loss": 0.8391, "step": 25310 }, { "epoch": 2.21, "learning_rate": 3.894226570006114e-05, "loss": 0.9105, "step": 25320 }, { "epoch": 2.21, "learning_rate": 3.8937898506419776e-05, "loss": 1.027, "step": 25330 }, { "epoch": 2.21, "learning_rate": 3.893353131277841e-05, "loss": 0.8306, "step": 25340 }, { "epoch": 2.21, "learning_rate": 3.892916411913705e-05, "loss": 0.9332, "step": 25350 }, { "epoch": 2.22, "learning_rate": 3.8924796925495675e-05, "loss": 0.8374, "step": 25360 }, { "epoch": 2.22, "learning_rate": 3.8920429731854315e-05, "loss": 0.8523, "step": 25370 }, { "epoch": 2.22, "learning_rate": 3.891606253821294e-05, "loss": 0.9511, "step": 25380 }, { "epoch": 2.22, "learning_rate": 3.891169534457158e-05, "loss": 0.7786, "step": 25390 }, { "epoch": 2.22, "learning_rate": 3.8907328150930215e-05, "loss": 0.9563, "step": 25400 }, { "epoch": 2.22, "learning_rate": 3.890296095728885e-05, "loss": 0.9415, "step": 25410 }, { "epoch": 2.22, "learning_rate": 3.889859376364748e-05, "loss": 0.865, "step": 25420 }, { "epoch": 2.22, "learning_rate": 3.8894226570006115e-05, "loss": 0.927, "step": 25430 }, { "epoch": 2.22, "learning_rate": 3.8889859376364755e-05, "loss": 0.9167, "step": 25440 }, { "epoch": 2.22, "learning_rate": 3.888549218272338e-05, "loss": 0.7889, "step": 25450 }, { "epoch": 2.22, "learning_rate": 3.888112498908202e-05, "loss": 0.8907, "step": 25460 }, { "epoch": 2.22, "learning_rate": 3.887675779544065e-05, "loss": 0.9217, "step": 25470 }, { "epoch": 2.23, "learning_rate": 3.887239060179929e-05, "loss": 0.9181, "step": 25480 }, { "epoch": 2.23, "learning_rate": 3.886802340815792e-05, "loss": 0.9643, "step": 25490 }, { "epoch": 2.23, "learning_rate": 3.8863656214516554e-05, "loss": 0.9515, "step": 25500 }, { "epoch": 2.23, "learning_rate": 3.885928902087519e-05, "loss": 0.8572, "step": 25510 }, { "epoch": 2.23, "learning_rate": 3.885492182723382e-05, "loss": 0.9012, "step": 25520 }, { "epoch": 2.23, "learning_rate": 3.885055463359245e-05, "loss": 0.7615, "step": 25530 }, { "epoch": 2.23, "learning_rate": 3.8846187439951087e-05, "loss": 0.7784, "step": 25540 }, { "epoch": 2.23, "learning_rate": 3.8841820246309727e-05, "loss": 0.9518, "step": 25550 }, { "epoch": 2.23, "learning_rate": 3.883745305266835e-05, "loss": 0.9307, "step": 25560 }, { "epoch": 2.23, "learning_rate": 3.883308585902699e-05, "loss": 0.9266, "step": 25570 }, { "epoch": 2.23, "learning_rate": 3.8828718665385626e-05, "loss": 1.0072, "step": 25580 }, { "epoch": 2.24, "learning_rate": 3.882435147174426e-05, "loss": 0.8514, "step": 25590 }, { "epoch": 2.24, "learning_rate": 3.881998427810289e-05, "loss": 0.8797, "step": 25600 }, { "epoch": 2.24, "learning_rate": 3.8815617084461526e-05, "loss": 0.9151, "step": 25610 }, { "epoch": 2.24, "learning_rate": 3.881124989082016e-05, "loss": 0.7862, "step": 25620 }, { "epoch": 2.24, "learning_rate": 3.880688269717879e-05, "loss": 1.0018, "step": 25630 }, { "epoch": 2.24, "learning_rate": 3.880251550353743e-05, "loss": 0.8088, "step": 25640 }, { "epoch": 2.24, "learning_rate": 3.8798148309896065e-05, "loss": 0.9481, "step": 25650 }, { "epoch": 2.24, "learning_rate": 3.87937811162547e-05, "loss": 0.854, "step": 25660 }, { "epoch": 2.24, "learning_rate": 3.878941392261333e-05, "loss": 0.8865, "step": 25670 }, { "epoch": 2.24, "learning_rate": 3.8785046728971965e-05, "loss": 0.8719, "step": 25680 }, { "epoch": 2.24, "learning_rate": 3.87806795353306e-05, "loss": 0.783, "step": 25690 }, { "epoch": 2.24, "learning_rate": 3.877631234168923e-05, "loss": 0.9409, "step": 25700 }, { "epoch": 2.25, "learning_rate": 3.8771945148047865e-05, "loss": 0.9009, "step": 25710 }, { "epoch": 2.25, "learning_rate": 3.87675779544065e-05, "loss": 0.9481, "step": 25720 }, { "epoch": 2.25, "learning_rate": 3.876321076076513e-05, "loss": 0.783, "step": 25730 }, { "epoch": 2.25, "learning_rate": 3.875884356712377e-05, "loss": 0.8429, "step": 25740 }, { "epoch": 2.25, "learning_rate": 3.8754476373482404e-05, "loss": 0.9001, "step": 25750 }, { "epoch": 2.25, "learning_rate": 3.875010917984104e-05, "loss": 0.8734, "step": 25760 }, { "epoch": 2.25, "learning_rate": 3.874574198619967e-05, "loss": 0.9672, "step": 25770 }, { "epoch": 2.25, "learning_rate": 3.8741374792558304e-05, "loss": 0.8796, "step": 25780 }, { "epoch": 2.25, "learning_rate": 3.873700759891694e-05, "loss": 0.8603, "step": 25790 }, { "epoch": 2.25, "learning_rate": 3.873264040527557e-05, "loss": 0.8875, "step": 25800 }, { "epoch": 2.25, "learning_rate": 3.872827321163421e-05, "loss": 0.9671, "step": 25810 }, { "epoch": 2.26, "learning_rate": 3.8723906017992837e-05, "loss": 0.9109, "step": 25820 }, { "epoch": 2.26, "learning_rate": 3.8719538824351477e-05, "loss": 0.8276, "step": 25830 }, { "epoch": 2.26, "learning_rate": 3.871517163071011e-05, "loss": 1.024, "step": 25840 }, { "epoch": 2.26, "learning_rate": 3.871080443706874e-05, "loss": 0.892, "step": 25850 }, { "epoch": 2.26, "learning_rate": 3.8706437243427376e-05, "loss": 0.917, "step": 25860 }, { "epoch": 2.26, "learning_rate": 3.870207004978601e-05, "loss": 0.8997, "step": 25870 }, { "epoch": 2.26, "learning_rate": 3.869770285614464e-05, "loss": 0.8367, "step": 25880 }, { "epoch": 2.26, "learning_rate": 3.8693335662503276e-05, "loss": 0.8792, "step": 25890 }, { "epoch": 2.26, "learning_rate": 3.8688968468861916e-05, "loss": 0.9476, "step": 25900 }, { "epoch": 2.26, "learning_rate": 3.868460127522054e-05, "loss": 0.9277, "step": 25910 }, { "epoch": 2.26, "learning_rate": 3.868023408157918e-05, "loss": 0.8288, "step": 25920 }, { "epoch": 2.26, "learning_rate": 3.867586688793781e-05, "loss": 0.9404, "step": 25930 }, { "epoch": 2.27, "learning_rate": 3.867149969429645e-05, "loss": 0.9071, "step": 25940 }, { "epoch": 2.27, "learning_rate": 3.866713250065508e-05, "loss": 0.856, "step": 25950 }, { "epoch": 2.27, "learning_rate": 3.8662765307013715e-05, "loss": 0.8676, "step": 25960 }, { "epoch": 2.27, "learning_rate": 3.865839811337235e-05, "loss": 0.8512, "step": 25970 }, { "epoch": 2.27, "learning_rate": 3.865403091973098e-05, "loss": 0.8674, "step": 25980 }, { "epoch": 2.27, "learning_rate": 3.864966372608962e-05, "loss": 0.9658, "step": 25990 }, { "epoch": 2.27, "learning_rate": 3.864529653244825e-05, "loss": 0.9211, "step": 26000 }, { "epoch": 2.27, "learning_rate": 3.864092933880689e-05, "loss": 0.9245, "step": 26010 }, { "epoch": 2.27, "learning_rate": 3.8636562145165514e-05, "loss": 0.8983, "step": 26020 }, { "epoch": 2.27, "learning_rate": 3.8632194951524154e-05, "loss": 0.8051, "step": 26030 }, { "epoch": 2.27, "learning_rate": 3.862782775788279e-05, "loss": 0.8324, "step": 26040 }, { "epoch": 2.28, "learning_rate": 3.862346056424142e-05, "loss": 0.9257, "step": 26050 }, { "epoch": 2.28, "learning_rate": 3.8619093370600054e-05, "loss": 0.9909, "step": 26060 }, { "epoch": 2.28, "learning_rate": 3.861472617695869e-05, "loss": 0.8704, "step": 26070 }, { "epoch": 2.28, "learning_rate": 3.861035898331733e-05, "loss": 0.7843, "step": 26080 }, { "epoch": 2.28, "learning_rate": 3.8605991789675953e-05, "loss": 0.9616, "step": 26090 }, { "epoch": 2.28, "learning_rate": 3.8601624596034593e-05, "loss": 0.798, "step": 26100 }, { "epoch": 2.28, "learning_rate": 3.859725740239322e-05, "loss": 0.9102, "step": 26110 }, { "epoch": 2.28, "learning_rate": 3.859289020875186e-05, "loss": 0.9212, "step": 26120 }, { "epoch": 2.28, "learning_rate": 3.8588523015110486e-05, "loss": 0.8784, "step": 26130 }, { "epoch": 2.28, "learning_rate": 3.8584155821469126e-05, "loss": 0.8685, "step": 26140 }, { "epoch": 2.28, "learning_rate": 3.857978862782776e-05, "loss": 0.9278, "step": 26150 }, { "epoch": 2.28, "learning_rate": 3.857542143418639e-05, "loss": 0.836, "step": 26160 }, { "epoch": 2.29, "learning_rate": 3.857105424054503e-05, "loss": 0.9116, "step": 26170 }, { "epoch": 2.29, "learning_rate": 3.856668704690366e-05, "loss": 0.8643, "step": 26180 }, { "epoch": 2.29, "learning_rate": 3.85623198532623e-05, "loss": 1.0034, "step": 26190 }, { "epoch": 2.29, "learning_rate": 3.8557952659620925e-05, "loss": 0.9029, "step": 26200 }, { "epoch": 2.29, "learning_rate": 3.8553585465979565e-05, "loss": 0.9276, "step": 26210 }, { "epoch": 2.29, "learning_rate": 3.85492182723382e-05, "loss": 0.8415, "step": 26220 }, { "epoch": 2.29, "learning_rate": 3.854485107869683e-05, "loss": 0.9597, "step": 26230 }, { "epoch": 2.29, "learning_rate": 3.8540483885055465e-05, "loss": 0.8595, "step": 26240 }, { "epoch": 2.29, "learning_rate": 3.85361166914141e-05, "loss": 0.8665, "step": 26250 }, { "epoch": 2.29, "learning_rate": 3.853174949777273e-05, "loss": 0.8617, "step": 26260 }, { "epoch": 2.29, "learning_rate": 3.8527382304131365e-05, "loss": 0.8992, "step": 26270 }, { "epoch": 2.3, "learning_rate": 3.8523015110490005e-05, "loss": 0.8361, "step": 26280 }, { "epoch": 2.3, "learning_rate": 3.851864791684863e-05, "loss": 0.8541, "step": 26290 }, { "epoch": 2.3, "learning_rate": 3.851428072320727e-05, "loss": 0.9196, "step": 26300 }, { "epoch": 2.3, "learning_rate": 3.8509913529565904e-05, "loss": 0.9852, "step": 26310 }, { "epoch": 2.3, "learning_rate": 3.850554633592454e-05, "loss": 0.8888, "step": 26320 }, { "epoch": 2.3, "learning_rate": 3.850117914228317e-05, "loss": 1.0016, "step": 26330 }, { "epoch": 2.3, "learning_rate": 3.8496811948641804e-05, "loss": 0.9295, "step": 26340 }, { "epoch": 2.3, "learning_rate": 3.849244475500044e-05, "loss": 0.9878, "step": 26350 }, { "epoch": 2.3, "learning_rate": 3.848807756135907e-05, "loss": 0.9318, "step": 26360 }, { "epoch": 2.3, "learning_rate": 3.848371036771771e-05, "loss": 0.8953, "step": 26370 }, { "epoch": 2.3, "learning_rate": 3.847934317407634e-05, "loss": 0.9807, "step": 26380 }, { "epoch": 2.31, "learning_rate": 3.8474975980434977e-05, "loss": 0.867, "step": 26390 }, { "epoch": 2.31, "learning_rate": 3.847060878679361e-05, "loss": 0.9032, "step": 26400 }, { "epoch": 2.31, "learning_rate": 3.846624159315224e-05, "loss": 0.7552, "step": 26410 }, { "epoch": 2.31, "learning_rate": 3.8461874399510876e-05, "loss": 0.9564, "step": 26420 }, { "epoch": 2.31, "learning_rate": 3.845750720586951e-05, "loss": 0.9544, "step": 26430 }, { "epoch": 2.31, "learning_rate": 3.845314001222814e-05, "loss": 0.8537, "step": 26440 }, { "epoch": 2.31, "learning_rate": 3.8448772818586776e-05, "loss": 0.7866, "step": 26450 }, { "epoch": 2.31, "learning_rate": 3.844440562494541e-05, "loss": 0.7853, "step": 26460 }, { "epoch": 2.31, "learning_rate": 3.844003843130405e-05, "loss": 0.8332, "step": 26470 }, { "epoch": 2.31, "learning_rate": 3.843567123766268e-05, "loss": 0.7433, "step": 26480 }, { "epoch": 2.31, "learning_rate": 3.8431304044021315e-05, "loss": 0.873, "step": 26490 }, { "epoch": 2.31, "learning_rate": 3.842693685037995e-05, "loss": 0.907, "step": 26500 }, { "epoch": 2.32, "learning_rate": 3.842256965673858e-05, "loss": 0.8519, "step": 26510 }, { "epoch": 2.32, "learning_rate": 3.8418202463097215e-05, "loss": 0.9365, "step": 26520 }, { "epoch": 2.32, "learning_rate": 3.841383526945585e-05, "loss": 0.9625, "step": 26530 }, { "epoch": 2.32, "learning_rate": 3.840946807581448e-05, "loss": 0.9123, "step": 26540 }, { "epoch": 2.32, "learning_rate": 3.8405100882173115e-05, "loss": 0.883, "step": 26550 }, { "epoch": 2.32, "learning_rate": 3.8400733688531755e-05, "loss": 1.0301, "step": 26560 }, { "epoch": 2.32, "learning_rate": 3.839636649489039e-05, "loss": 0.8476, "step": 26570 }, { "epoch": 2.32, "learning_rate": 3.839199930124902e-05, "loss": 0.8638, "step": 26580 }, { "epoch": 2.32, "learning_rate": 3.8387632107607654e-05, "loss": 0.8378, "step": 26590 }, { "epoch": 2.32, "learning_rate": 3.838326491396629e-05, "loss": 0.8649, "step": 26600 }, { "epoch": 2.32, "learning_rate": 3.837889772032492e-05, "loss": 0.869, "step": 26610 }, { "epoch": 2.33, "learning_rate": 3.8374530526683554e-05, "loss": 0.9053, "step": 26620 }, { "epoch": 2.33, "learning_rate": 3.8370163333042194e-05, "loss": 0.9383, "step": 26630 }, { "epoch": 2.33, "learning_rate": 3.836579613940082e-05, "loss": 0.7857, "step": 26640 }, { "epoch": 2.33, "learning_rate": 3.836142894575946e-05, "loss": 0.8657, "step": 26650 }, { "epoch": 2.33, "learning_rate": 3.835706175211809e-05, "loss": 0.9213, "step": 26660 }, { "epoch": 2.33, "learning_rate": 3.835269455847673e-05, "loss": 0.9264, "step": 26670 }, { "epoch": 2.33, "learning_rate": 3.834832736483536e-05, "loss": 1.0045, "step": 26680 }, { "epoch": 2.33, "learning_rate": 3.834396017119399e-05, "loss": 0.9877, "step": 26690 }, { "epoch": 2.33, "learning_rate": 3.8339592977552626e-05, "loss": 0.9406, "step": 26700 }, { "epoch": 2.33, "learning_rate": 3.833522578391126e-05, "loss": 0.7996, "step": 26710 }, { "epoch": 2.33, "learning_rate": 3.83308585902699e-05, "loss": 0.9746, "step": 26720 }, { "epoch": 2.33, "learning_rate": 3.8326491396628526e-05, "loss": 0.8804, "step": 26730 }, { "epoch": 2.34, "learning_rate": 3.8322124202987166e-05, "loss": 0.8101, "step": 26740 }, { "epoch": 2.34, "learning_rate": 3.831775700934579e-05, "loss": 0.9151, "step": 26750 }, { "epoch": 2.34, "learning_rate": 3.831338981570443e-05, "loss": 0.9393, "step": 26760 }, { "epoch": 2.34, "learning_rate": 3.8309022622063065e-05, "loss": 0.9434, "step": 26770 }, { "epoch": 2.34, "learning_rate": 3.83046554284217e-05, "loss": 0.9038, "step": 26780 }, { "epoch": 2.34, "learning_rate": 3.830028823478033e-05, "loss": 0.939, "step": 26790 }, { "epoch": 2.34, "learning_rate": 3.8295921041138965e-05, "loss": 0.7909, "step": 26800 }, { "epoch": 2.34, "learning_rate": 3.8291553847497605e-05, "loss": 0.8198, "step": 26810 }, { "epoch": 2.34, "learning_rate": 3.828718665385623e-05, "loss": 0.9151, "step": 26820 }, { "epoch": 2.34, "learning_rate": 3.828281946021487e-05, "loss": 0.8811, "step": 26830 }, { "epoch": 2.34, "learning_rate": 3.82784522665735e-05, "loss": 0.9721, "step": 26840 }, { "epoch": 2.35, "learning_rate": 3.827408507293214e-05, "loss": 0.824, "step": 26850 }, { "epoch": 2.35, "learning_rate": 3.8269717879290764e-05, "loss": 0.9274, "step": 26860 }, { "epoch": 2.35, "learning_rate": 3.8265350685649404e-05, "loss": 0.9757, "step": 26870 }, { "epoch": 2.35, "learning_rate": 3.826098349200804e-05, "loss": 0.9253, "step": 26880 }, { "epoch": 2.35, "learning_rate": 3.825661629836667e-05, "loss": 0.8558, "step": 26890 }, { "epoch": 2.35, "learning_rate": 3.825224910472531e-05, "loss": 0.9289, "step": 26900 }, { "epoch": 2.35, "learning_rate": 3.824788191108394e-05, "loss": 0.9817, "step": 26910 }, { "epoch": 2.35, "learning_rate": 3.824351471744258e-05, "loss": 0.8305, "step": 26920 }, { "epoch": 2.35, "learning_rate": 3.8239147523801203e-05, "loss": 0.8045, "step": 26930 }, { "epoch": 2.35, "learning_rate": 3.8234780330159843e-05, "loss": 0.8044, "step": 26940 }, { "epoch": 2.35, "learning_rate": 3.823041313651847e-05, "loss": 0.874, "step": 26950 }, { "epoch": 2.35, "learning_rate": 3.822604594287711e-05, "loss": 0.8977, "step": 26960 }, { "epoch": 2.36, "learning_rate": 3.822167874923574e-05, "loss": 0.8725, "step": 26970 }, { "epoch": 2.36, "learning_rate": 3.8217311555594376e-05, "loss": 1.0004, "step": 26980 }, { "epoch": 2.36, "learning_rate": 3.821294436195301e-05, "loss": 0.9293, "step": 26990 }, { "epoch": 2.36, "learning_rate": 3.820857716831164e-05, "loss": 0.7762, "step": 27000 }, { "epoch": 2.36, "learning_rate": 3.820420997467028e-05, "loss": 0.8381, "step": 27010 }, { "epoch": 2.36, "learning_rate": 3.819984278102891e-05, "loss": 0.9129, "step": 27020 }, { "epoch": 2.36, "learning_rate": 3.819547558738755e-05, "loss": 0.9258, "step": 27030 }, { "epoch": 2.36, "learning_rate": 3.8191108393746175e-05, "loss": 0.9834, "step": 27040 }, { "epoch": 2.36, "learning_rate": 3.8186741200104815e-05, "loss": 0.9089, "step": 27050 }, { "epoch": 2.36, "learning_rate": 3.818237400646345e-05, "loss": 0.9071, "step": 27060 }, { "epoch": 2.36, "learning_rate": 3.817800681282208e-05, "loss": 0.8775, "step": 27070 }, { "epoch": 2.37, "learning_rate": 3.8173639619180715e-05, "loss": 0.8611, "step": 27080 }, { "epoch": 2.37, "learning_rate": 3.816927242553935e-05, "loss": 0.9264, "step": 27090 }, { "epoch": 2.37, "learning_rate": 3.816490523189799e-05, "loss": 0.8804, "step": 27100 }, { "epoch": 2.37, "learning_rate": 3.8160538038256615e-05, "loss": 0.8369, "step": 27110 }, { "epoch": 2.37, "learning_rate": 3.8156170844615255e-05, "loss": 0.8099, "step": 27120 }, { "epoch": 2.37, "learning_rate": 3.815180365097389e-05, "loss": 0.8842, "step": 27130 }, { "epoch": 2.37, "learning_rate": 3.814743645733252e-05, "loss": 0.908, "step": 27140 }, { "epoch": 2.37, "learning_rate": 3.8143069263691154e-05, "loss": 0.9624, "step": 27150 }, { "epoch": 2.37, "learning_rate": 3.813870207004979e-05, "loss": 0.8502, "step": 27160 }, { "epoch": 2.37, "learning_rate": 3.813433487640842e-05, "loss": 0.8926, "step": 27170 }, { "epoch": 2.37, "learning_rate": 3.8129967682767054e-05, "loss": 0.9042, "step": 27180 }, { "epoch": 2.37, "learning_rate": 3.812560048912569e-05, "loss": 0.8802, "step": 27190 }, { "epoch": 2.38, "learning_rate": 3.812123329548432e-05, "loss": 0.9184, "step": 27200 }, { "epoch": 2.38, "learning_rate": 3.811686610184296e-05, "loss": 0.9333, "step": 27210 }, { "epoch": 2.38, "learning_rate": 3.8112498908201593e-05, "loss": 0.9385, "step": 27220 }, { "epoch": 2.38, "learning_rate": 3.810813171456023e-05, "loss": 0.8899, "step": 27230 }, { "epoch": 2.38, "learning_rate": 3.810376452091886e-05, "loss": 0.9678, "step": 27240 }, { "epoch": 2.38, "learning_rate": 3.809939732727749e-05, "loss": 0.852, "step": 27250 }, { "epoch": 2.38, "learning_rate": 3.8095030133636126e-05, "loss": 0.841, "step": 27260 }, { "epoch": 2.38, "learning_rate": 3.809066293999476e-05, "loss": 0.9297, "step": 27270 }, { "epoch": 2.38, "learning_rate": 3.808629574635339e-05, "loss": 0.9052, "step": 27280 }, { "epoch": 2.38, "learning_rate": 3.808192855271203e-05, "loss": 0.9502, "step": 27290 }, { "epoch": 2.38, "learning_rate": 3.8077561359070666e-05, "loss": 0.7993, "step": 27300 }, { "epoch": 2.39, "learning_rate": 3.80731941654293e-05, "loss": 0.8459, "step": 27310 }, { "epoch": 2.39, "learning_rate": 3.806882697178793e-05, "loss": 0.9762, "step": 27320 }, { "epoch": 2.39, "learning_rate": 3.8064459778146565e-05, "loss": 0.9187, "step": 27330 }, { "epoch": 2.39, "learning_rate": 3.80600925845052e-05, "loss": 0.9693, "step": 27340 }, { "epoch": 2.39, "learning_rate": 3.805572539086383e-05, "loss": 0.8865, "step": 27350 }, { "epoch": 2.39, "learning_rate": 3.8051358197222465e-05, "loss": 0.8245, "step": 27360 }, { "epoch": 2.39, "learning_rate": 3.80469910035811e-05, "loss": 0.8544, "step": 27370 }, { "epoch": 2.39, "learning_rate": 3.804262380993974e-05, "loss": 0.9236, "step": 27380 }, { "epoch": 2.39, "learning_rate": 3.8038256616298365e-05, "loss": 0.9529, "step": 27390 }, { "epoch": 2.39, "learning_rate": 3.8033889422657005e-05, "loss": 0.9051, "step": 27400 }, { "epoch": 2.39, "learning_rate": 3.802952222901564e-05, "loss": 0.8557, "step": 27410 }, { "epoch": 2.39, "learning_rate": 3.802515503537427e-05, "loss": 0.9562, "step": 27420 }, { "epoch": 2.4, "learning_rate": 3.8020787841732904e-05, "loss": 0.9046, "step": 27430 }, { "epoch": 2.4, "learning_rate": 3.801642064809154e-05, "loss": 0.9238, "step": 27440 }, { "epoch": 2.4, "learning_rate": 3.801205345445017e-05, "loss": 0.9644, "step": 27450 }, { "epoch": 2.4, "learning_rate": 3.8007686260808804e-05, "loss": 0.954, "step": 27460 }, { "epoch": 2.4, "learning_rate": 3.8003319067167444e-05, "loss": 0.8797, "step": 27470 }, { "epoch": 2.4, "learning_rate": 3.799895187352607e-05, "loss": 0.9005, "step": 27480 }, { "epoch": 2.4, "learning_rate": 3.799458467988471e-05, "loss": 0.8653, "step": 27490 }, { "epoch": 2.4, "learning_rate": 3.7990217486243343e-05, "loss": 0.8788, "step": 27500 }, { "epoch": 2.4, "learning_rate": 3.798585029260198e-05, "loss": 0.9159, "step": 27510 }, { "epoch": 2.4, "learning_rate": 3.798148309896061e-05, "loss": 0.881, "step": 27520 }, { "epoch": 2.4, "learning_rate": 3.797711590531924e-05, "loss": 0.9706, "step": 27530 }, { "epoch": 2.41, "learning_rate": 3.797274871167788e-05, "loss": 0.9096, "step": 27540 }, { "epoch": 2.41, "learning_rate": 3.796838151803651e-05, "loss": 0.8772, "step": 27550 }, { "epoch": 2.41, "learning_rate": 3.796401432439515e-05, "loss": 0.8083, "step": 27560 }, { "epoch": 2.41, "learning_rate": 3.7959647130753776e-05, "loss": 0.9359, "step": 27570 }, { "epoch": 2.41, "learning_rate": 3.7955279937112416e-05, "loss": 0.8518, "step": 27580 }, { "epoch": 2.41, "learning_rate": 3.795091274347104e-05, "loss": 0.9001, "step": 27590 }, { "epoch": 2.41, "learning_rate": 3.794654554982968e-05, "loss": 0.8307, "step": 27600 }, { "epoch": 2.41, "learning_rate": 3.7942178356188315e-05, "loss": 0.8333, "step": 27610 }, { "epoch": 2.41, "learning_rate": 3.793781116254695e-05, "loss": 0.8095, "step": 27620 }, { "epoch": 2.41, "learning_rate": 3.793344396890559e-05, "loss": 0.9528, "step": 27630 }, { "epoch": 2.41, "learning_rate": 3.7929076775264215e-05, "loss": 0.9253, "step": 27640 }, { "epoch": 2.42, "learning_rate": 3.7924709581622855e-05, "loss": 0.8031, "step": 27650 }, { "epoch": 2.42, "learning_rate": 3.792034238798148e-05, "loss": 1.0076, "step": 27660 }, { "epoch": 2.42, "learning_rate": 3.791597519434012e-05, "loss": 0.9623, "step": 27670 }, { "epoch": 2.42, "learning_rate": 3.791160800069875e-05, "loss": 0.8274, "step": 27680 }, { "epoch": 2.42, "learning_rate": 3.790724080705739e-05, "loss": 0.9221, "step": 27690 }, { "epoch": 2.42, "learning_rate": 3.790287361341602e-05, "loss": 0.8816, "step": 27700 }, { "epoch": 2.42, "learning_rate": 3.7898506419774654e-05, "loss": 0.9172, "step": 27710 }, { "epoch": 2.42, "learning_rate": 3.789413922613329e-05, "loss": 0.8602, "step": 27720 }, { "epoch": 2.42, "learning_rate": 3.788977203249192e-05, "loss": 0.8672, "step": 27730 }, { "epoch": 2.42, "learning_rate": 3.788540483885056e-05, "loss": 0.8402, "step": 27740 }, { "epoch": 2.42, "learning_rate": 3.788103764520919e-05, "loss": 0.9284, "step": 27750 }, { "epoch": 2.42, "learning_rate": 3.787667045156783e-05, "loss": 0.9028, "step": 27760 }, { "epoch": 2.43, "learning_rate": 3.7872303257926453e-05, "loss": 0.9364, "step": 27770 }, { "epoch": 2.43, "learning_rate": 3.7867936064285093e-05, "loss": 0.8405, "step": 27780 }, { "epoch": 2.43, "learning_rate": 3.786356887064373e-05, "loss": 0.8595, "step": 27790 }, { "epoch": 2.43, "learning_rate": 3.785920167700236e-05, "loss": 0.944, "step": 27800 }, { "epoch": 2.43, "learning_rate": 3.785483448336099e-05, "loss": 0.9186, "step": 27810 }, { "epoch": 2.43, "learning_rate": 3.7850467289719626e-05, "loss": 0.8419, "step": 27820 }, { "epoch": 2.43, "learning_rate": 3.7846100096078266e-05, "loss": 0.9227, "step": 27830 }, { "epoch": 2.43, "learning_rate": 3.784173290243689e-05, "loss": 0.9303, "step": 27840 }, { "epoch": 2.43, "learning_rate": 3.783736570879553e-05, "loss": 0.78, "step": 27850 }, { "epoch": 2.43, "learning_rate": 3.783299851515416e-05, "loss": 0.8949, "step": 27860 }, { "epoch": 2.43, "learning_rate": 3.78286313215128e-05, "loss": 0.76, "step": 27870 }, { "epoch": 2.44, "learning_rate": 3.782426412787143e-05, "loss": 0.8809, "step": 27880 }, { "epoch": 2.44, "learning_rate": 3.7819896934230065e-05, "loss": 0.8449, "step": 27890 }, { "epoch": 2.44, "learning_rate": 3.78155297405887e-05, "loss": 1.0643, "step": 27900 }, { "epoch": 2.44, "learning_rate": 3.781116254694733e-05, "loss": 1.0307, "step": 27910 }, { "epoch": 2.44, "learning_rate": 3.7806795353305965e-05, "loss": 0.8784, "step": 27920 }, { "epoch": 2.44, "learning_rate": 3.78024281596646e-05, "loss": 0.8797, "step": 27930 }, { "epoch": 2.44, "learning_rate": 3.779806096602324e-05, "loss": 0.8203, "step": 27940 }, { "epoch": 2.44, "learning_rate": 3.779369377238187e-05, "loss": 0.8096, "step": 27950 }, { "epoch": 2.44, "learning_rate": 3.7789326578740505e-05, "loss": 0.8641, "step": 27960 }, { "epoch": 2.44, "learning_rate": 3.778495938509914e-05, "loss": 0.832, "step": 27970 }, { "epoch": 2.44, "learning_rate": 3.778059219145777e-05, "loss": 0.9069, "step": 27980 }, { "epoch": 2.44, "learning_rate": 3.7776224997816404e-05, "loss": 0.8991, "step": 27990 }, { "epoch": 2.45, "learning_rate": 3.777185780417504e-05, "loss": 0.9697, "step": 28000 }, { "epoch": 2.45, "learning_rate": 3.776749061053367e-05, "loss": 0.9659, "step": 28010 }, { "epoch": 2.45, "learning_rate": 3.7763123416892304e-05, "loss": 0.8802, "step": 28020 }, { "epoch": 2.45, "learning_rate": 3.7758756223250944e-05, "loss": 0.8412, "step": 28030 }, { "epoch": 2.45, "learning_rate": 3.775438902960958e-05, "loss": 0.8759, "step": 28040 }, { "epoch": 2.45, "learning_rate": 3.775002183596821e-05, "loss": 1.0513, "step": 28050 }, { "epoch": 2.45, "learning_rate": 3.7745654642326843e-05, "loss": 0.8851, "step": 28060 }, { "epoch": 2.45, "learning_rate": 3.774128744868548e-05, "loss": 0.965, "step": 28070 }, { "epoch": 2.45, "learning_rate": 3.773692025504411e-05, "loss": 0.8704, "step": 28080 }, { "epoch": 2.45, "learning_rate": 3.773255306140274e-05, "loss": 0.85, "step": 28090 }, { "epoch": 2.45, "learning_rate": 3.7728185867761376e-05, "loss": 0.8991, "step": 28100 }, { "epoch": 2.46, "learning_rate": 3.7723818674120016e-05, "loss": 1.0349, "step": 28110 }, { "epoch": 2.46, "learning_rate": 3.771945148047864e-05, "loss": 0.8779, "step": 28120 }, { "epoch": 2.46, "learning_rate": 3.771508428683728e-05, "loss": 0.9126, "step": 28130 }, { "epoch": 2.46, "learning_rate": 3.7710717093195916e-05, "loss": 0.9072, "step": 28140 }, { "epoch": 2.46, "learning_rate": 3.770634989955455e-05, "loss": 0.8645, "step": 28150 }, { "epoch": 2.46, "learning_rate": 3.770198270591318e-05, "loss": 0.8625, "step": 28160 }, { "epoch": 2.46, "learning_rate": 3.7697615512271815e-05, "loss": 1.0313, "step": 28170 }, { "epoch": 2.46, "learning_rate": 3.769324831863045e-05, "loss": 0.8339, "step": 28180 }, { "epoch": 2.46, "learning_rate": 3.768888112498908e-05, "loss": 0.9292, "step": 28190 }, { "epoch": 2.46, "learning_rate": 3.768451393134772e-05, "loss": 0.9588, "step": 28200 }, { "epoch": 2.46, "learning_rate": 3.768014673770635e-05, "loss": 0.8291, "step": 28210 }, { "epoch": 2.46, "learning_rate": 3.767577954406499e-05, "loss": 0.8975, "step": 28220 }, { "epoch": 2.47, "learning_rate": 3.767141235042362e-05, "loss": 0.95, "step": 28230 }, { "epoch": 2.47, "learning_rate": 3.7667045156782255e-05, "loss": 0.8907, "step": 28240 }, { "epoch": 2.47, "learning_rate": 3.766267796314089e-05, "loss": 0.9476, "step": 28250 }, { "epoch": 2.47, "learning_rate": 3.765831076949952e-05, "loss": 0.8179, "step": 28260 }, { "epoch": 2.47, "learning_rate": 3.7653943575858154e-05, "loss": 0.8824, "step": 28270 }, { "epoch": 2.47, "learning_rate": 3.764957638221679e-05, "loss": 0.9275, "step": 28280 }, { "epoch": 2.47, "learning_rate": 3.764520918857543e-05, "loss": 0.8864, "step": 28290 }, { "epoch": 2.47, "learning_rate": 3.7640841994934054e-05, "loss": 0.8628, "step": 28300 }, { "epoch": 2.47, "learning_rate": 3.7636474801292694e-05, "loss": 0.9246, "step": 28310 }, { "epoch": 2.47, "learning_rate": 3.763210760765132e-05, "loss": 0.8165, "step": 28320 }, { "epoch": 2.47, "learning_rate": 3.762774041400996e-05, "loss": 0.8437, "step": 28330 }, { "epoch": 2.48, "learning_rate": 3.7623373220368593e-05, "loss": 0.9607, "step": 28340 }, { "epoch": 2.48, "learning_rate": 3.761900602672723e-05, "loss": 0.8957, "step": 28350 }, { "epoch": 2.48, "learning_rate": 3.761463883308587e-05, "loss": 0.9313, "step": 28360 }, { "epoch": 2.48, "learning_rate": 3.761027163944449e-05, "loss": 0.8727, "step": 28370 }, { "epoch": 2.48, "learning_rate": 3.760590444580313e-05, "loss": 0.8245, "step": 28380 }, { "epoch": 2.48, "learning_rate": 3.760153725216176e-05, "loss": 0.9247, "step": 28390 }, { "epoch": 2.48, "learning_rate": 3.75971700585204e-05, "loss": 0.8337, "step": 28400 }, { "epoch": 2.48, "learning_rate": 3.7592802864879026e-05, "loss": 0.909, "step": 28410 }, { "epoch": 2.48, "learning_rate": 3.7588435671237666e-05, "loss": 0.9037, "step": 28420 }, { "epoch": 2.48, "learning_rate": 3.75840684775963e-05, "loss": 1.0252, "step": 28430 }, { "epoch": 2.48, "learning_rate": 3.757970128395493e-05, "loss": 0.9458, "step": 28440 }, { "epoch": 2.48, "learning_rate": 3.7575334090313565e-05, "loss": 0.9845, "step": 28450 }, { "epoch": 2.49, "learning_rate": 3.75709668966722e-05, "loss": 0.8313, "step": 28460 }, { "epoch": 2.49, "learning_rate": 3.756659970303084e-05, "loss": 0.7355, "step": 28470 }, { "epoch": 2.49, "learning_rate": 3.7562232509389465e-05, "loss": 0.7191, "step": 28480 }, { "epoch": 2.49, "learning_rate": 3.7557865315748105e-05, "loss": 0.8746, "step": 28490 }, { "epoch": 2.49, "learning_rate": 3.755349812210673e-05, "loss": 0.8124, "step": 28500 }, { "epoch": 2.49, "learning_rate": 3.754913092846537e-05, "loss": 0.9495, "step": 28510 }, { "epoch": 2.49, "learning_rate": 3.7544763734824005e-05, "loss": 0.8358, "step": 28520 }, { "epoch": 2.49, "learning_rate": 3.754039654118264e-05, "loss": 0.8624, "step": 28530 }, { "epoch": 2.49, "learning_rate": 3.753602934754127e-05, "loss": 0.9682, "step": 28540 }, { "epoch": 2.49, "learning_rate": 3.7531662153899904e-05, "loss": 0.9362, "step": 28550 }, { "epoch": 2.49, "learning_rate": 3.7527294960258544e-05, "loss": 0.8355, "step": 28560 }, { "epoch": 2.5, "learning_rate": 3.752292776661717e-05, "loss": 0.7724, "step": 28570 }, { "epoch": 2.5, "learning_rate": 3.751856057297581e-05, "loss": 0.8557, "step": 28580 }, { "epoch": 2.5, "learning_rate": 3.751419337933444e-05, "loss": 0.8543, "step": 28590 }, { "epoch": 2.5, "learning_rate": 3.750982618569308e-05, "loss": 0.909, "step": 28600 }, { "epoch": 2.5, "learning_rate": 3.750545899205171e-05, "loss": 0.96, "step": 28610 }, { "epoch": 2.5, "learning_rate": 3.7501091798410343e-05, "loss": 0.9205, "step": 28620 }, { "epoch": 2.5, "learning_rate": 3.749672460476898e-05, "loss": 0.8027, "step": 28630 }, { "epoch": 2.5, "learning_rate": 3.749235741112761e-05, "loss": 0.8754, "step": 28640 }, { "epoch": 2.5, "learning_rate": 3.748799021748624e-05, "loss": 0.8994, "step": 28650 }, { "epoch": 2.5, "learning_rate": 3.7483623023844876e-05, "loss": 0.9041, "step": 28660 }, { "epoch": 2.5, "learning_rate": 3.7479255830203516e-05, "loss": 0.8703, "step": 28670 }, { "epoch": 2.51, "learning_rate": 3.747488863656214e-05, "loss": 0.9414, "step": 28680 }, { "epoch": 2.51, "learning_rate": 3.747052144292078e-05, "loss": 0.8962, "step": 28690 }, { "epoch": 2.51, "learning_rate": 3.7466154249279416e-05, "loss": 0.9463, "step": 28700 }, { "epoch": 2.51, "learning_rate": 3.746178705563805e-05, "loss": 0.9432, "step": 28710 }, { "epoch": 2.51, "learning_rate": 3.745741986199668e-05, "loss": 0.8816, "step": 28720 }, { "epoch": 2.51, "learning_rate": 3.7453052668355316e-05, "loss": 0.9267, "step": 28730 }, { "epoch": 2.51, "learning_rate": 3.744868547471395e-05, "loss": 0.9836, "step": 28740 }, { "epoch": 2.51, "learning_rate": 3.744431828107258e-05, "loss": 0.8484, "step": 28750 }, { "epoch": 2.51, "learning_rate": 3.743995108743122e-05, "loss": 0.9633, "step": 28760 }, { "epoch": 2.51, "learning_rate": 3.7435583893789855e-05, "loss": 0.8619, "step": 28770 }, { "epoch": 2.51, "learning_rate": 3.743121670014849e-05, "loss": 0.9429, "step": 28780 }, { "epoch": 2.51, "learning_rate": 3.742684950650712e-05, "loss": 0.895, "step": 28790 }, { "epoch": 2.52, "learning_rate": 3.7422482312865755e-05, "loss": 0.9302, "step": 28800 }, { "epoch": 2.52, "learning_rate": 3.741811511922439e-05, "loss": 0.8122, "step": 28810 }, { "epoch": 2.52, "learning_rate": 3.741374792558302e-05, "loss": 0.8599, "step": 28820 }, { "epoch": 2.52, "learning_rate": 3.7409380731941654e-05, "loss": 0.845, "step": 28830 }, { "epoch": 2.52, "learning_rate": 3.740501353830029e-05, "loss": 0.9022, "step": 28840 }, { "epoch": 2.52, "learning_rate": 3.740064634465892e-05, "loss": 0.8399, "step": 28850 }, { "epoch": 2.52, "learning_rate": 3.739627915101756e-05, "loss": 0.9031, "step": 28860 }, { "epoch": 2.52, "learning_rate": 3.7391911957376194e-05, "loss": 0.773, "step": 28870 }, { "epoch": 2.52, "learning_rate": 3.738754476373483e-05, "loss": 0.9786, "step": 28880 }, { "epoch": 2.52, "learning_rate": 3.738317757009346e-05, "loss": 0.9155, "step": 28890 }, { "epoch": 2.52, "learning_rate": 3.7378810376452094e-05, "loss": 0.8825, "step": 28900 }, { "epoch": 2.53, "learning_rate": 3.737444318281073e-05, "loss": 0.9342, "step": 28910 }, { "epoch": 2.53, "learning_rate": 3.737007598916936e-05, "loss": 0.9824, "step": 28920 }, { "epoch": 2.53, "learning_rate": 3.736570879552799e-05, "loss": 0.8604, "step": 28930 }, { "epoch": 2.53, "learning_rate": 3.7361341601886626e-05, "loss": 0.9313, "step": 28940 }, { "epoch": 2.53, "learning_rate": 3.7356974408245266e-05, "loss": 0.9212, "step": 28950 }, { "epoch": 2.53, "learning_rate": 3.73526072146039e-05, "loss": 0.9935, "step": 28960 }, { "epoch": 2.53, "learning_rate": 3.734824002096253e-05, "loss": 0.9001, "step": 28970 }, { "epoch": 2.53, "learning_rate": 3.7343872827321166e-05, "loss": 0.8225, "step": 28980 }, { "epoch": 2.53, "learning_rate": 3.73395056336798e-05, "loss": 0.8585, "step": 28990 }, { "epoch": 2.53, "learning_rate": 3.733513844003843e-05, "loss": 0.7686, "step": 29000 }, { "epoch": 2.53, "learning_rate": 3.7330771246397066e-05, "loss": 0.897, "step": 29010 }, { "epoch": 2.53, "learning_rate": 3.7326404052755705e-05, "loss": 0.9644, "step": 29020 }, { "epoch": 2.54, "learning_rate": 3.732203685911433e-05, "loss": 1.0603, "step": 29030 }, { "epoch": 2.54, "learning_rate": 3.731766966547297e-05, "loss": 0.9168, "step": 29040 }, { "epoch": 2.54, "learning_rate": 3.73133024718316e-05, "loss": 0.9792, "step": 29050 }, { "epoch": 2.54, "learning_rate": 3.730893527819024e-05, "loss": 0.9122, "step": 29060 }, { "epoch": 2.54, "learning_rate": 3.730456808454887e-05, "loss": 0.9485, "step": 29070 }, { "epoch": 2.54, "learning_rate": 3.7300200890907505e-05, "loss": 0.7853, "step": 29080 }, { "epoch": 2.54, "learning_rate": 3.729583369726614e-05, "loss": 0.8752, "step": 29090 }, { "epoch": 2.54, "learning_rate": 3.729146650362477e-05, "loss": 0.9133, "step": 29100 }, { "epoch": 2.54, "learning_rate": 3.728709930998341e-05, "loss": 0.8705, "step": 29110 }, { "epoch": 2.54, "learning_rate": 3.728273211634204e-05, "loss": 0.8165, "step": 29120 }, { "epoch": 2.54, "learning_rate": 3.727836492270068e-05, "loss": 0.862, "step": 29130 }, { "epoch": 2.55, "learning_rate": 3.7273997729059304e-05, "loss": 0.8964, "step": 29140 }, { "epoch": 2.55, "learning_rate": 3.7269630535417944e-05, "loss": 1.0126, "step": 29150 }, { "epoch": 2.55, "learning_rate": 3.726526334177658e-05, "loss": 0.8337, "step": 29160 }, { "epoch": 2.55, "learning_rate": 3.726089614813521e-05, "loss": 0.9428, "step": 29170 }, { "epoch": 2.55, "learning_rate": 3.7256528954493844e-05, "loss": 0.8714, "step": 29180 }, { "epoch": 2.55, "learning_rate": 3.725216176085248e-05, "loss": 0.8689, "step": 29190 }, { "epoch": 2.55, "learning_rate": 3.724779456721112e-05, "loss": 0.8388, "step": 29200 }, { "epoch": 2.55, "learning_rate": 3.724342737356974e-05, "loss": 0.8633, "step": 29210 }, { "epoch": 2.55, "learning_rate": 3.723906017992838e-05, "loss": 0.9142, "step": 29220 }, { "epoch": 2.55, "learning_rate": 3.723469298628701e-05, "loss": 0.8784, "step": 29230 }, { "epoch": 2.55, "learning_rate": 3.723032579264565e-05, "loss": 0.8121, "step": 29240 }, { "epoch": 2.55, "learning_rate": 3.7225958599004276e-05, "loss": 0.8571, "step": 29250 }, { "epoch": 2.56, "learning_rate": 3.7221591405362916e-05, "loss": 0.9242, "step": 29260 }, { "epoch": 2.56, "learning_rate": 3.721722421172155e-05, "loss": 0.9927, "step": 29270 }, { "epoch": 2.56, "learning_rate": 3.721285701808018e-05, "loss": 1.0068, "step": 29280 }, { "epoch": 2.56, "learning_rate": 3.720848982443882e-05, "loss": 0.9295, "step": 29290 }, { "epoch": 2.56, "learning_rate": 3.720412263079745e-05, "loss": 0.91, "step": 29300 }, { "epoch": 2.56, "learning_rate": 3.719975543715609e-05, "loss": 0.8985, "step": 29310 }, { "epoch": 2.56, "learning_rate": 3.7195388243514715e-05, "loss": 0.8521, "step": 29320 }, { "epoch": 2.56, "learning_rate": 3.7191021049873355e-05, "loss": 0.9149, "step": 29330 }, { "epoch": 2.56, "learning_rate": 3.718665385623198e-05, "loss": 0.8992, "step": 29340 }, { "epoch": 2.56, "learning_rate": 3.718228666259062e-05, "loss": 0.8932, "step": 29350 }, { "epoch": 2.56, "learning_rate": 3.7177919468949255e-05, "loss": 0.9405, "step": 29360 }, { "epoch": 2.57, "learning_rate": 3.717355227530789e-05, "loss": 0.8556, "step": 29370 }, { "epoch": 2.57, "learning_rate": 3.716918508166652e-05, "loss": 0.8299, "step": 29380 }, { "epoch": 2.57, "learning_rate": 3.7164817888025154e-05, "loss": 0.8066, "step": 29390 }, { "epoch": 2.57, "learning_rate": 3.7160450694383794e-05, "loss": 0.816, "step": 29400 }, { "epoch": 2.57, "learning_rate": 3.715608350074242e-05, "loss": 0.8753, "step": 29410 }, { "epoch": 2.57, "learning_rate": 3.715171630710106e-05, "loss": 0.8798, "step": 29420 }, { "epoch": 2.57, "learning_rate": 3.7147349113459694e-05, "loss": 0.9945, "step": 29430 }, { "epoch": 2.57, "learning_rate": 3.714298191981833e-05, "loss": 0.9417, "step": 29440 }, { "epoch": 2.57, "learning_rate": 3.713861472617696e-05, "loss": 0.9615, "step": 29450 }, { "epoch": 2.57, "learning_rate": 3.7134247532535594e-05, "loss": 0.8597, "step": 29460 }, { "epoch": 2.57, "learning_rate": 3.712988033889423e-05, "loss": 0.951, "step": 29470 }, { "epoch": 2.57, "learning_rate": 3.712551314525286e-05, "loss": 0.8804, "step": 29480 }, { "epoch": 2.58, "learning_rate": 3.71211459516115e-05, "loss": 0.9168, "step": 29490 }, { "epoch": 2.58, "learning_rate": 3.7116778757970126e-05, "loss": 0.8557, "step": 29500 }, { "epoch": 2.58, "learning_rate": 3.7112411564328766e-05, "loss": 0.8645, "step": 29510 }, { "epoch": 2.58, "learning_rate": 3.71080443706874e-05, "loss": 1.0844, "step": 29520 }, { "epoch": 2.58, "learning_rate": 3.710367717704603e-05, "loss": 0.8833, "step": 29530 }, { "epoch": 2.58, "learning_rate": 3.7099309983404666e-05, "loss": 0.7537, "step": 29540 }, { "epoch": 2.58, "learning_rate": 3.70949427897633e-05, "loss": 0.9232, "step": 29550 }, { "epoch": 2.58, "learning_rate": 3.709057559612193e-05, "loss": 0.8953, "step": 29560 }, { "epoch": 2.58, "learning_rate": 3.7086208402480566e-05, "loss": 0.8938, "step": 29570 }, { "epoch": 2.58, "learning_rate": 3.70818412088392e-05, "loss": 0.9393, "step": 29580 }, { "epoch": 2.58, "learning_rate": 3.707747401519784e-05, "loss": 0.8734, "step": 29590 }, { "epoch": 2.59, "learning_rate": 3.707310682155647e-05, "loss": 0.7744, "step": 29600 }, { "epoch": 2.59, "learning_rate": 3.7068739627915105e-05, "loss": 0.8491, "step": 29610 }, { "epoch": 2.59, "learning_rate": 3.706437243427374e-05, "loss": 0.9464, "step": 29620 }, { "epoch": 2.59, "learning_rate": 3.706000524063237e-05, "loss": 1.1008, "step": 29630 }, { "epoch": 2.59, "learning_rate": 3.7055638046991005e-05, "loss": 0.8729, "step": 29640 }, { "epoch": 2.59, "learning_rate": 3.705127085334964e-05, "loss": 1.0173, "step": 29650 }, { "epoch": 2.59, "learning_rate": 3.704690365970827e-05, "loss": 0.8467, "step": 29660 }, { "epoch": 2.59, "learning_rate": 3.7042536466066904e-05, "loss": 0.831, "step": 29670 }, { "epoch": 2.59, "learning_rate": 3.7038169272425544e-05, "loss": 0.8869, "step": 29680 }, { "epoch": 2.59, "learning_rate": 3.703380207878418e-05, "loss": 0.9159, "step": 29690 }, { "epoch": 2.59, "learning_rate": 3.702943488514281e-05, "loss": 0.7965, "step": 29700 }, { "epoch": 2.59, "learning_rate": 3.7025067691501444e-05, "loss": 0.8959, "step": 29710 }, { "epoch": 2.6, "learning_rate": 3.702070049786008e-05, "loss": 0.8991, "step": 29720 }, { "epoch": 2.6, "learning_rate": 3.701633330421871e-05, "loss": 0.7936, "step": 29730 }, { "epoch": 2.6, "learning_rate": 3.7011966110577344e-05, "loss": 0.7891, "step": 29740 }, { "epoch": 2.6, "learning_rate": 3.700759891693598e-05, "loss": 0.7465, "step": 29750 }, { "epoch": 2.6, "learning_rate": 3.700323172329461e-05, "loss": 0.9482, "step": 29760 }, { "epoch": 2.6, "learning_rate": 3.699886452965325e-05, "loss": 0.8593, "step": 29770 }, { "epoch": 2.6, "learning_rate": 3.6994497336011876e-05, "loss": 0.8967, "step": 29780 }, { "epoch": 2.6, "learning_rate": 3.6990130142370516e-05, "loss": 0.8984, "step": 29790 }, { "epoch": 2.6, "learning_rate": 3.698576294872915e-05, "loss": 0.7997, "step": 29800 }, { "epoch": 2.6, "learning_rate": 3.698139575508778e-05, "loss": 1.0243, "step": 29810 }, { "epoch": 2.6, "learning_rate": 3.6977028561446416e-05, "loss": 0.8945, "step": 29820 }, { "epoch": 2.61, "learning_rate": 3.697266136780505e-05, "loss": 0.824, "step": 29830 }, { "epoch": 2.61, "learning_rate": 3.696829417416369e-05, "loss": 0.8862, "step": 29840 }, { "epoch": 2.61, "learning_rate": 3.6963926980522316e-05, "loss": 0.9838, "step": 29850 }, { "epoch": 2.61, "learning_rate": 3.6959559786880956e-05, "loss": 0.8577, "step": 29860 }, { "epoch": 2.61, "learning_rate": 3.695519259323958e-05, "loss": 0.9283, "step": 29870 }, { "epoch": 2.61, "learning_rate": 3.695082539959822e-05, "loss": 0.844, "step": 29880 }, { "epoch": 2.61, "learning_rate": 3.6946458205956855e-05, "loss": 0.8483, "step": 29890 }, { "epoch": 2.61, "learning_rate": 3.694209101231549e-05, "loss": 0.9538, "step": 29900 }, { "epoch": 2.61, "learning_rate": 3.693772381867412e-05, "loss": 0.9602, "step": 29910 }, { "epoch": 2.61, "learning_rate": 3.6933356625032755e-05, "loss": 0.9144, "step": 29920 }, { "epoch": 2.61, "learning_rate": 3.6928989431391395e-05, "loss": 0.897, "step": 29930 }, { "epoch": 2.62, "learning_rate": 3.692462223775002e-05, "loss": 0.9545, "step": 29940 }, { "epoch": 2.62, "learning_rate": 3.692025504410866e-05, "loss": 0.929, "step": 29950 }, { "epoch": 2.62, "learning_rate": 3.691588785046729e-05, "loss": 0.9091, "step": 29960 }, { "epoch": 2.62, "learning_rate": 3.691152065682593e-05, "loss": 0.8406, "step": 29970 }, { "epoch": 2.62, "learning_rate": 3.6907153463184554e-05, "loss": 0.7906, "step": 29980 }, { "epoch": 2.62, "learning_rate": 3.6902786269543194e-05, "loss": 0.9229, "step": 29990 }, { "epoch": 2.62, "learning_rate": 3.689841907590183e-05, "loss": 0.7617, "step": 30000 }, { "epoch": 2.62, "eval_accuracy": 0.5697160263338902, "eval_loss": 0.8902987241744995, "eval_runtime": 84.089, "eval_samples_per_second": 121.027, "eval_steps_per_second": 15.139, "step": 30000 }, { "epoch": 2.62, "learning_rate": 3.689405188226046e-05, "loss": 0.8725, "step": 30010 }, { "epoch": 2.62, "learning_rate": 3.68896846886191e-05, "loss": 0.9209, "step": 30020 }, { "epoch": 2.62, "learning_rate": 3.688531749497773e-05, "loss": 0.8024, "step": 30030 }, { "epoch": 2.62, "learning_rate": 3.688095030133637e-05, "loss": 0.8148, "step": 30040 }, { "epoch": 2.62, "learning_rate": 3.687658310769499e-05, "loss": 0.9597, "step": 30050 }, { "epoch": 2.63, "learning_rate": 3.687221591405363e-05, "loss": 0.9644, "step": 30060 }, { "epoch": 2.63, "learning_rate": 3.686784872041226e-05, "loss": 0.8896, "step": 30070 }, { "epoch": 2.63, "learning_rate": 3.68634815267709e-05, "loss": 0.8064, "step": 30080 }, { "epoch": 2.63, "learning_rate": 3.685911433312953e-05, "loss": 0.9304, "step": 30090 }, { "epoch": 2.63, "learning_rate": 3.6854747139488166e-05, "loss": 0.7495, "step": 30100 }, { "epoch": 2.63, "learning_rate": 3.68503799458468e-05, "loss": 0.876, "step": 30110 }, { "epoch": 2.63, "learning_rate": 3.684601275220543e-05, "loss": 0.87, "step": 30120 }, { "epoch": 2.63, "learning_rate": 3.684164555856407e-05, "loss": 0.923, "step": 30130 }, { "epoch": 2.63, "learning_rate": 3.68372783649227e-05, "loss": 0.7467, "step": 30140 }, { "epoch": 2.63, "learning_rate": 3.683291117128134e-05, "loss": 0.9045, "step": 30150 }, { "epoch": 2.63, "learning_rate": 3.6828543977639965e-05, "loss": 0.8363, "step": 30160 }, { "epoch": 2.64, "learning_rate": 3.6824176783998605e-05, "loss": 0.8909, "step": 30170 }, { "epoch": 2.64, "learning_rate": 3.681980959035724e-05, "loss": 0.9214, "step": 30180 }, { "epoch": 2.64, "learning_rate": 3.681544239671587e-05, "loss": 0.8907, "step": 30190 }, { "epoch": 2.64, "learning_rate": 3.6811075203074505e-05, "loss": 0.9207, "step": 30200 }, { "epoch": 2.64, "learning_rate": 3.680670800943314e-05, "loss": 0.895, "step": 30210 }, { "epoch": 2.64, "learning_rate": 3.680234081579178e-05, "loss": 0.826, "step": 30220 }, { "epoch": 2.64, "learning_rate": 3.6797973622150404e-05, "loss": 0.95, "step": 30230 }, { "epoch": 2.64, "learning_rate": 3.6793606428509044e-05, "loss": 0.845, "step": 30240 }, { "epoch": 2.64, "learning_rate": 3.678923923486768e-05, "loss": 0.8591, "step": 30250 }, { "epoch": 2.64, "learning_rate": 3.678487204122631e-05, "loss": 0.8775, "step": 30260 }, { "epoch": 2.64, "learning_rate": 3.6780504847584944e-05, "loss": 1.0453, "step": 30270 }, { "epoch": 2.64, "learning_rate": 3.677613765394358e-05, "loss": 0.7653, "step": 30280 }, { "epoch": 2.65, "learning_rate": 3.677177046030221e-05, "loss": 0.9071, "step": 30290 }, { "epoch": 2.65, "learning_rate": 3.6767403266660844e-05, "loss": 0.8831, "step": 30300 }, { "epoch": 2.65, "learning_rate": 3.676303607301948e-05, "loss": 0.9214, "step": 30310 }, { "epoch": 2.65, "learning_rate": 3.675866887937811e-05, "loss": 0.7844, "step": 30320 }, { "epoch": 2.65, "learning_rate": 3.675430168573675e-05, "loss": 0.8768, "step": 30330 }, { "epoch": 2.65, "learning_rate": 3.674993449209538e-05, "loss": 0.9314, "step": 30340 }, { "epoch": 2.65, "learning_rate": 3.6745567298454016e-05, "loss": 0.8272, "step": 30350 }, { "epoch": 2.65, "learning_rate": 3.674120010481265e-05, "loss": 0.971, "step": 30360 }, { "epoch": 2.65, "learning_rate": 3.673683291117128e-05, "loss": 0.8493, "step": 30370 }, { "epoch": 2.65, "learning_rate": 3.6732465717529916e-05, "loss": 0.8179, "step": 30380 }, { "epoch": 2.65, "learning_rate": 3.672809852388855e-05, "loss": 1.0002, "step": 30390 }, { "epoch": 2.66, "learning_rate": 3.672373133024718e-05, "loss": 0.9528, "step": 30400 }, { "epoch": 2.66, "learning_rate": 3.6719364136605816e-05, "loss": 0.8391, "step": 30410 }, { "epoch": 2.66, "learning_rate": 3.6714996942964456e-05, "loss": 0.9359, "step": 30420 }, { "epoch": 2.66, "learning_rate": 3.671062974932309e-05, "loss": 0.7887, "step": 30430 }, { "epoch": 2.66, "learning_rate": 3.670626255568172e-05, "loss": 0.8466, "step": 30440 }, { "epoch": 2.66, "learning_rate": 3.6701895362040355e-05, "loss": 0.8466, "step": 30450 }, { "epoch": 2.66, "learning_rate": 3.669752816839899e-05, "loss": 0.7982, "step": 30460 }, { "epoch": 2.66, "learning_rate": 3.669316097475762e-05, "loss": 1.0767, "step": 30470 }, { "epoch": 2.66, "learning_rate": 3.6688793781116255e-05, "loss": 0.9009, "step": 30480 }, { "epoch": 2.66, "learning_rate": 3.668442658747489e-05, "loss": 0.9746, "step": 30490 }, { "epoch": 2.66, "learning_rate": 3.668005939383353e-05, "loss": 0.8384, "step": 30500 }, { "epoch": 2.66, "learning_rate": 3.6675692200192154e-05, "loss": 0.9489, "step": 30510 }, { "epoch": 2.67, "learning_rate": 3.6671325006550794e-05, "loss": 0.7793, "step": 30520 }, { "epoch": 2.67, "learning_rate": 3.666695781290943e-05, "loss": 0.966, "step": 30530 }, { "epoch": 2.67, "learning_rate": 3.666259061926806e-05, "loss": 0.8517, "step": 30540 }, { "epoch": 2.67, "learning_rate": 3.6658223425626694e-05, "loss": 0.8766, "step": 30550 }, { "epoch": 2.67, "learning_rate": 3.665385623198533e-05, "loss": 0.8056, "step": 30560 }, { "epoch": 2.67, "learning_rate": 3.664948903834396e-05, "loss": 0.9925, "step": 30570 }, { "epoch": 2.67, "learning_rate": 3.6645121844702594e-05, "loss": 0.9064, "step": 30580 }, { "epoch": 2.67, "learning_rate": 3.6640754651061234e-05, "loss": 0.9196, "step": 30590 }, { "epoch": 2.67, "learning_rate": 3.663638745741986e-05, "loss": 0.9677, "step": 30600 }, { "epoch": 2.67, "learning_rate": 3.66320202637785e-05, "loss": 0.8676, "step": 30610 }, { "epoch": 2.67, "learning_rate": 3.662765307013713e-05, "loss": 0.9462, "step": 30620 }, { "epoch": 2.68, "learning_rate": 3.6623285876495766e-05, "loss": 1.055, "step": 30630 }, { "epoch": 2.68, "learning_rate": 3.66189186828544e-05, "loss": 0.8715, "step": 30640 }, { "epoch": 2.68, "learning_rate": 3.661455148921303e-05, "loss": 0.9595, "step": 30650 }, { "epoch": 2.68, "learning_rate": 3.661018429557167e-05, "loss": 0.8803, "step": 30660 }, { "epoch": 2.68, "learning_rate": 3.66058171019303e-05, "loss": 0.8949, "step": 30670 }, { "epoch": 2.68, "learning_rate": 3.660144990828894e-05, "loss": 0.9004, "step": 30680 }, { "epoch": 2.68, "learning_rate": 3.6597082714647566e-05, "loss": 0.9795, "step": 30690 }, { "epoch": 2.68, "learning_rate": 3.6592715521006206e-05, "loss": 0.86, "step": 30700 }, { "epoch": 2.68, "learning_rate": 3.658834832736483e-05, "loss": 0.8744, "step": 30710 }, { "epoch": 2.68, "learning_rate": 3.658398113372347e-05, "loss": 0.8781, "step": 30720 }, { "epoch": 2.68, "learning_rate": 3.6579613940082105e-05, "loss": 0.987, "step": 30730 }, { "epoch": 2.68, "learning_rate": 3.657524674644074e-05, "loss": 0.9166, "step": 30740 }, { "epoch": 2.69, "learning_rate": 3.657087955279938e-05, "loss": 0.8625, "step": 30750 }, { "epoch": 2.69, "learning_rate": 3.6566512359158005e-05, "loss": 0.8649, "step": 30760 }, { "epoch": 2.69, "learning_rate": 3.6562145165516645e-05, "loss": 0.827, "step": 30770 }, { "epoch": 2.69, "learning_rate": 3.655777797187527e-05, "loss": 0.829, "step": 30780 }, { "epoch": 2.69, "learning_rate": 3.655341077823391e-05, "loss": 0.8834, "step": 30790 }, { "epoch": 2.69, "learning_rate": 3.654904358459254e-05, "loss": 0.9434, "step": 30800 }, { "epoch": 2.69, "learning_rate": 3.654467639095118e-05, "loss": 0.7786, "step": 30810 }, { "epoch": 2.69, "learning_rate": 3.654030919730981e-05, "loss": 0.8089, "step": 30820 }, { "epoch": 2.69, "learning_rate": 3.6535942003668444e-05, "loss": 0.9265, "step": 30830 }, { "epoch": 2.69, "learning_rate": 3.653157481002708e-05, "loss": 0.8696, "step": 30840 }, { "epoch": 2.69, "learning_rate": 3.652720761638571e-05, "loss": 0.9755, "step": 30850 }, { "epoch": 2.7, "learning_rate": 3.652284042274435e-05, "loss": 0.8557, "step": 30860 }, { "epoch": 2.7, "learning_rate": 3.651847322910298e-05, "loss": 0.8577, "step": 30870 }, { "epoch": 2.7, "learning_rate": 3.651410603546162e-05, "loss": 0.9624, "step": 30880 }, { "epoch": 2.7, "learning_rate": 3.650973884182024e-05, "loss": 0.7929, "step": 30890 }, { "epoch": 2.7, "learning_rate": 3.650537164817888e-05, "loss": 0.8877, "step": 30900 }, { "epoch": 2.7, "learning_rate": 3.6501004454537516e-05, "loss": 0.9239, "step": 30910 }, { "epoch": 2.7, "learning_rate": 3.649663726089615e-05, "loss": 0.804, "step": 30920 }, { "epoch": 2.7, "learning_rate": 3.649227006725478e-05, "loss": 0.8929, "step": 30930 }, { "epoch": 2.7, "learning_rate": 3.6487902873613416e-05, "loss": 0.9311, "step": 30940 }, { "epoch": 2.7, "learning_rate": 3.6483535679972056e-05, "loss": 0.9447, "step": 30950 }, { "epoch": 2.7, "learning_rate": 3.647916848633068e-05, "loss": 0.8262, "step": 30960 }, { "epoch": 2.71, "learning_rate": 3.647480129268932e-05, "loss": 0.8579, "step": 30970 }, { "epoch": 2.71, "learning_rate": 3.647043409904795e-05, "loss": 0.9707, "step": 30980 }, { "epoch": 2.71, "learning_rate": 3.646606690540659e-05, "loss": 0.9639, "step": 30990 }, { "epoch": 2.71, "learning_rate": 3.646169971176522e-05, "loss": 0.9303, "step": 31000 }, { "epoch": 2.71, "learning_rate": 3.6457332518123855e-05, "loss": 0.9284, "step": 31010 }, { "epoch": 2.71, "learning_rate": 3.645296532448249e-05, "loss": 0.812, "step": 31020 }, { "epoch": 2.71, "learning_rate": 3.644859813084112e-05, "loss": 0.8999, "step": 31030 }, { "epoch": 2.71, "learning_rate": 3.6444230937199755e-05, "loss": 0.7755, "step": 31040 }, { "epoch": 2.71, "learning_rate": 3.643986374355839e-05, "loss": 0.9287, "step": 31050 }, { "epoch": 2.71, "learning_rate": 3.643549654991703e-05, "loss": 0.8968, "step": 31060 }, { "epoch": 2.71, "learning_rate": 3.643112935627566e-05, "loss": 0.8468, "step": 31070 }, { "epoch": 2.71, "learning_rate": 3.6426762162634294e-05, "loss": 0.8325, "step": 31080 }, { "epoch": 2.72, "learning_rate": 3.642239496899293e-05, "loss": 0.8397, "step": 31090 }, { "epoch": 2.72, "learning_rate": 3.641802777535156e-05, "loss": 1.0066, "step": 31100 }, { "epoch": 2.72, "learning_rate": 3.6413660581710194e-05, "loss": 0.9081, "step": 31110 }, { "epoch": 2.72, "learning_rate": 3.640929338806883e-05, "loss": 0.8058, "step": 31120 }, { "epoch": 2.72, "learning_rate": 3.640492619442746e-05, "loss": 0.8724, "step": 31130 }, { "epoch": 2.72, "learning_rate": 3.6400559000786094e-05, "loss": 0.938, "step": 31140 }, { "epoch": 2.72, "learning_rate": 3.6396191807144734e-05, "loss": 0.811, "step": 31150 }, { "epoch": 2.72, "learning_rate": 3.639182461350337e-05, "loss": 0.8148, "step": 31160 }, { "epoch": 2.72, "learning_rate": 3.6387457419862e-05, "loss": 0.9617, "step": 31170 }, { "epoch": 2.72, "learning_rate": 3.638309022622063e-05, "loss": 0.8239, "step": 31180 }, { "epoch": 2.72, "learning_rate": 3.6378723032579266e-05, "loss": 0.8597, "step": 31190 }, { "epoch": 2.73, "learning_rate": 3.63743558389379e-05, "loss": 0.7862, "step": 31200 }, { "epoch": 2.73, "learning_rate": 3.636998864529653e-05, "loss": 1.0252, "step": 31210 }, { "epoch": 2.73, "learning_rate": 3.6365621451655166e-05, "loss": 0.8375, "step": 31220 }, { "epoch": 2.73, "learning_rate": 3.63612542580138e-05, "loss": 0.7994, "step": 31230 }, { "epoch": 2.73, "learning_rate": 3.635688706437243e-05, "loss": 0.8623, "step": 31240 }, { "epoch": 2.73, "learning_rate": 3.635251987073107e-05, "loss": 0.8096, "step": 31250 }, { "epoch": 2.73, "learning_rate": 3.6348152677089706e-05, "loss": 0.8325, "step": 31260 }, { "epoch": 2.73, "learning_rate": 3.634378548344834e-05, "loss": 0.9191, "step": 31270 }, { "epoch": 2.73, "learning_rate": 3.633941828980697e-05, "loss": 0.9055, "step": 31280 }, { "epoch": 2.73, "learning_rate": 3.6335051096165605e-05, "loss": 0.9339, "step": 31290 }, { "epoch": 2.73, "learning_rate": 3.633068390252424e-05, "loss": 0.8545, "step": 31300 }, { "epoch": 2.73, "learning_rate": 3.632631670888287e-05, "loss": 0.9295, "step": 31310 }, { "epoch": 2.74, "learning_rate": 3.632194951524151e-05, "loss": 0.857, "step": 31320 }, { "epoch": 2.74, "learning_rate": 3.631758232160014e-05, "loss": 0.9053, "step": 31330 }, { "epoch": 2.74, "learning_rate": 3.631321512795878e-05, "loss": 0.869, "step": 31340 }, { "epoch": 2.74, "learning_rate": 3.630884793431741e-05, "loss": 0.8556, "step": 31350 }, { "epoch": 2.74, "learning_rate": 3.6304480740676044e-05, "loss": 0.9302, "step": 31360 }, { "epoch": 2.74, "learning_rate": 3.630011354703468e-05, "loss": 0.9028, "step": 31370 }, { "epoch": 2.74, "learning_rate": 3.629574635339331e-05, "loss": 0.8506, "step": 31380 }, { "epoch": 2.74, "learning_rate": 3.6291379159751944e-05, "loss": 1.0644, "step": 31390 }, { "epoch": 2.74, "learning_rate": 3.628701196611058e-05, "loss": 0.8551, "step": 31400 }, { "epoch": 2.74, "learning_rate": 3.628264477246922e-05, "loss": 0.9015, "step": 31410 }, { "epoch": 2.74, "learning_rate": 3.6278277578827844e-05, "loss": 0.9056, "step": 31420 }, { "epoch": 2.75, "learning_rate": 3.6273910385186484e-05, "loss": 0.837, "step": 31430 }, { "epoch": 2.75, "learning_rate": 3.626954319154511e-05, "loss": 0.9594, "step": 31440 }, { "epoch": 2.75, "learning_rate": 3.626517599790375e-05, "loss": 0.9665, "step": 31450 }, { "epoch": 2.75, "learning_rate": 3.626080880426238e-05, "loss": 0.9836, "step": 31460 }, { "epoch": 2.75, "learning_rate": 3.6256441610621016e-05, "loss": 0.9962, "step": 31470 }, { "epoch": 2.75, "learning_rate": 3.6252074416979656e-05, "loss": 0.9267, "step": 31480 }, { "epoch": 2.75, "learning_rate": 3.624770722333828e-05, "loss": 0.9897, "step": 31490 }, { "epoch": 2.75, "learning_rate": 3.624334002969692e-05, "loss": 0.9022, "step": 31500 }, { "epoch": 2.75, "learning_rate": 3.623897283605555e-05, "loss": 0.8704, "step": 31510 }, { "epoch": 2.75, "learning_rate": 3.623460564241419e-05, "loss": 0.8266, "step": 31520 }, { "epoch": 2.75, "learning_rate": 3.6230238448772816e-05, "loss": 0.9212, "step": 31530 }, { "epoch": 2.75, "learning_rate": 3.6225871255131456e-05, "loss": 0.8737, "step": 31540 }, { "epoch": 2.76, "learning_rate": 3.622150406149009e-05, "loss": 0.8937, "step": 31550 }, { "epoch": 2.76, "learning_rate": 3.621713686784872e-05, "loss": 0.807, "step": 31560 }, { "epoch": 2.76, "learning_rate": 3.6212769674207355e-05, "loss": 0.8526, "step": 31570 }, { "epoch": 2.76, "learning_rate": 3.620840248056599e-05, "loss": 0.8936, "step": 31580 }, { "epoch": 2.76, "learning_rate": 3.620403528692463e-05, "loss": 0.9244, "step": 31590 }, { "epoch": 2.76, "learning_rate": 3.6199668093283255e-05, "loss": 1.0083, "step": 31600 }, { "epoch": 2.76, "learning_rate": 3.6195300899641895e-05, "loss": 0.9569, "step": 31610 }, { "epoch": 2.76, "learning_rate": 3.619093370600052e-05, "loss": 0.9787, "step": 31620 }, { "epoch": 2.76, "learning_rate": 3.618656651235916e-05, "loss": 1.012, "step": 31630 }, { "epoch": 2.76, "learning_rate": 3.618219931871779e-05, "loss": 0.889, "step": 31640 }, { "epoch": 2.76, "learning_rate": 3.617783212507643e-05, "loss": 0.8541, "step": 31650 }, { "epoch": 2.77, "learning_rate": 3.617346493143506e-05, "loss": 0.8598, "step": 31660 }, { "epoch": 2.77, "learning_rate": 3.6169097737793694e-05, "loss": 0.9854, "step": 31670 }, { "epoch": 2.77, "learning_rate": 3.6164730544152334e-05, "loss": 0.8487, "step": 31680 }, { "epoch": 2.77, "learning_rate": 3.616036335051096e-05, "loss": 0.9482, "step": 31690 }, { "epoch": 2.77, "learning_rate": 3.61559961568696e-05, "loss": 0.8416, "step": 31700 }, { "epoch": 2.77, "learning_rate": 3.615162896322823e-05, "loss": 0.9164, "step": 31710 }, { "epoch": 2.77, "learning_rate": 3.614726176958687e-05, "loss": 0.9078, "step": 31720 }, { "epoch": 2.77, "learning_rate": 3.61428945759455e-05, "loss": 0.9142, "step": 31730 }, { "epoch": 2.77, "learning_rate": 3.613852738230413e-05, "loss": 0.9093, "step": 31740 }, { "epoch": 2.77, "learning_rate": 3.6134160188662766e-05, "loss": 0.8964, "step": 31750 }, { "epoch": 2.77, "learning_rate": 3.61297929950214e-05, "loss": 0.8281, "step": 31760 }, { "epoch": 2.77, "learning_rate": 3.612542580138003e-05, "loss": 0.9545, "step": 31770 }, { "epoch": 2.78, "learning_rate": 3.6121058607738666e-05, "loss": 1.0251, "step": 31780 }, { "epoch": 2.78, "learning_rate": 3.6116691414097306e-05, "loss": 0.879, "step": 31790 }, { "epoch": 2.78, "learning_rate": 3.611232422045593e-05, "loss": 0.9087, "step": 31800 }, { "epoch": 2.78, "learning_rate": 3.610795702681457e-05, "loss": 0.9483, "step": 31810 }, { "epoch": 2.78, "learning_rate": 3.6103589833173206e-05, "loss": 0.8455, "step": 31820 }, { "epoch": 2.78, "learning_rate": 3.609922263953184e-05, "loss": 0.7958, "step": 31830 }, { "epoch": 2.78, "learning_rate": 3.609485544589047e-05, "loss": 0.9556, "step": 31840 }, { "epoch": 2.78, "learning_rate": 3.6090488252249105e-05, "loss": 0.9793, "step": 31850 }, { "epoch": 2.78, "learning_rate": 3.608612105860774e-05, "loss": 1.0086, "step": 31860 }, { "epoch": 2.78, "learning_rate": 3.608175386496637e-05, "loss": 0.8565, "step": 31870 }, { "epoch": 2.78, "learning_rate": 3.607738667132501e-05, "loss": 0.8732, "step": 31880 }, { "epoch": 2.79, "learning_rate": 3.6073019477683645e-05, "loss": 0.9144, "step": 31890 }, { "epoch": 2.79, "learning_rate": 3.606865228404228e-05, "loss": 0.7981, "step": 31900 }, { "epoch": 2.79, "learning_rate": 3.606428509040091e-05, "loss": 0.9067, "step": 31910 }, { "epoch": 2.79, "learning_rate": 3.6059917896759544e-05, "loss": 0.7604, "step": 31920 }, { "epoch": 2.79, "learning_rate": 3.605555070311818e-05, "loss": 0.9703, "step": 31930 }, { "epoch": 2.79, "learning_rate": 3.605118350947681e-05, "loss": 0.9444, "step": 31940 }, { "epoch": 2.79, "learning_rate": 3.6046816315835444e-05, "loss": 0.9026, "step": 31950 }, { "epoch": 2.79, "learning_rate": 3.604244912219408e-05, "loss": 0.8295, "step": 31960 }, { "epoch": 2.79, "learning_rate": 3.603808192855271e-05, "loss": 1.0002, "step": 31970 }, { "epoch": 2.79, "learning_rate": 3.603371473491135e-05, "loss": 0.9038, "step": 31980 }, { "epoch": 2.79, "learning_rate": 3.6029347541269984e-05, "loss": 0.9065, "step": 31990 }, { "epoch": 2.8, "learning_rate": 3.602498034762862e-05, "loss": 0.8089, "step": 32000 }, { "epoch": 2.8, "learning_rate": 3.602061315398725e-05, "loss": 0.8224, "step": 32010 }, { "epoch": 2.8, "learning_rate": 3.601624596034588e-05, "loss": 0.8463, "step": 32020 }, { "epoch": 2.8, "learning_rate": 3.6011878766704516e-05, "loss": 0.9144, "step": 32030 }, { "epoch": 2.8, "learning_rate": 3.600751157306315e-05, "loss": 0.8298, "step": 32040 }, { "epoch": 2.8, "learning_rate": 3.600314437942178e-05, "loss": 0.9097, "step": 32050 }, { "epoch": 2.8, "learning_rate": 3.5998777185780416e-05, "loss": 0.7905, "step": 32060 }, { "epoch": 2.8, "learning_rate": 3.5994409992139056e-05, "loss": 1.0139, "step": 32070 }, { "epoch": 2.8, "learning_rate": 3.599004279849769e-05, "loss": 0.933, "step": 32080 }, { "epoch": 2.8, "learning_rate": 3.598567560485632e-05, "loss": 0.947, "step": 32090 }, { "epoch": 2.8, "learning_rate": 3.5981308411214956e-05, "loss": 0.9221, "step": 32100 }, { "epoch": 2.8, "learning_rate": 3.597694121757359e-05, "loss": 0.948, "step": 32110 }, { "epoch": 2.81, "learning_rate": 3.597257402393222e-05, "loss": 0.8697, "step": 32120 }, { "epoch": 2.81, "learning_rate": 3.5968206830290855e-05, "loss": 0.9354, "step": 32130 }, { "epoch": 2.81, "learning_rate": 3.5963839636649495e-05, "loss": 0.9762, "step": 32140 }, { "epoch": 2.81, "learning_rate": 3.595947244300812e-05, "loss": 0.8958, "step": 32150 }, { "epoch": 2.81, "learning_rate": 3.595510524936676e-05, "loss": 0.8151, "step": 32160 }, { "epoch": 2.81, "learning_rate": 3.5950738055725395e-05, "loss": 0.9358, "step": 32170 }, { "epoch": 2.81, "learning_rate": 3.594637086208403e-05, "loss": 1.0005, "step": 32180 }, { "epoch": 2.81, "learning_rate": 3.594200366844266e-05, "loss": 1.0432, "step": 32190 }, { "epoch": 2.81, "learning_rate": 3.5937636474801294e-05, "loss": 0.8377, "step": 32200 }, { "epoch": 2.81, "learning_rate": 3.593326928115993e-05, "loss": 0.891, "step": 32210 }, { "epoch": 2.81, "learning_rate": 3.592890208751856e-05, "loss": 0.9114, "step": 32220 }, { "epoch": 2.82, "learning_rate": 3.59245348938772e-05, "loss": 0.967, "step": 32230 }, { "epoch": 2.82, "learning_rate": 3.592016770023583e-05, "loss": 0.949, "step": 32240 }, { "epoch": 2.82, "learning_rate": 3.591580050659447e-05, "loss": 0.9408, "step": 32250 }, { "epoch": 2.82, "learning_rate": 3.5911433312953094e-05, "loss": 0.9918, "step": 32260 }, { "epoch": 2.82, "learning_rate": 3.5907066119311734e-05, "loss": 0.9556, "step": 32270 }, { "epoch": 2.82, "learning_rate": 3.590269892567037e-05, "loss": 0.7735, "step": 32280 }, { "epoch": 2.82, "learning_rate": 3.5898331732029e-05, "loss": 0.9057, "step": 32290 }, { "epoch": 2.82, "learning_rate": 3.589396453838763e-05, "loss": 0.9906, "step": 32300 }, { "epoch": 2.82, "learning_rate": 3.5889597344746266e-05, "loss": 0.8508, "step": 32310 }, { "epoch": 2.82, "learning_rate": 3.5885230151104906e-05, "loss": 0.8245, "step": 32320 }, { "epoch": 2.82, "learning_rate": 3.588086295746353e-05, "loss": 0.8635, "step": 32330 }, { "epoch": 2.82, "learning_rate": 3.587649576382217e-05, "loss": 0.9695, "step": 32340 }, { "epoch": 2.83, "learning_rate": 3.58721285701808e-05, "loss": 0.9106, "step": 32350 }, { "epoch": 2.83, "learning_rate": 3.586776137653944e-05, "loss": 0.8739, "step": 32360 }, { "epoch": 2.83, "learning_rate": 3.586339418289807e-05, "loss": 0.9221, "step": 32370 }, { "epoch": 2.83, "learning_rate": 3.5859026989256706e-05, "loss": 1.082, "step": 32380 }, { "epoch": 2.83, "learning_rate": 3.585465979561534e-05, "loss": 0.7639, "step": 32390 }, { "epoch": 2.83, "learning_rate": 3.585029260197397e-05, "loss": 0.9124, "step": 32400 }, { "epoch": 2.83, "learning_rate": 3.584592540833261e-05, "loss": 0.9329, "step": 32410 }, { "epoch": 2.83, "learning_rate": 3.584155821469124e-05, "loss": 0.8865, "step": 32420 }, { "epoch": 2.83, "learning_rate": 3.583719102104988e-05, "loss": 0.8719, "step": 32430 }, { "epoch": 2.83, "learning_rate": 3.5832823827408505e-05, "loss": 0.9509, "step": 32440 }, { "epoch": 2.83, "learning_rate": 3.5828456633767145e-05, "loss": 0.8419, "step": 32450 }, { "epoch": 2.84, "learning_rate": 3.582408944012577e-05, "loss": 0.8648, "step": 32460 }, { "epoch": 2.84, "learning_rate": 3.581972224648441e-05, "loss": 0.8786, "step": 32470 }, { "epoch": 2.84, "learning_rate": 3.5815355052843044e-05, "loss": 0.7561, "step": 32480 }, { "epoch": 2.84, "learning_rate": 3.581098785920168e-05, "loss": 0.7937, "step": 32490 }, { "epoch": 2.84, "learning_rate": 3.580662066556032e-05, "loss": 0.8721, "step": 32500 }, { "epoch": 2.84, "learning_rate": 3.5802253471918944e-05, "loss": 0.9386, "step": 32510 }, { "epoch": 2.84, "learning_rate": 3.5797886278277584e-05, "loss": 0.8892, "step": 32520 }, { "epoch": 2.84, "learning_rate": 3.579351908463621e-05, "loss": 0.9537, "step": 32530 }, { "epoch": 2.84, "learning_rate": 3.578915189099485e-05, "loss": 0.9444, "step": 32540 }, { "epoch": 2.84, "learning_rate": 3.5784784697353484e-05, "loss": 0.9662, "step": 32550 }, { "epoch": 2.84, "learning_rate": 3.578041750371212e-05, "loss": 0.8605, "step": 32560 }, { "epoch": 2.84, "learning_rate": 3.577605031007075e-05, "loss": 0.8425, "step": 32570 }, { "epoch": 2.85, "learning_rate": 3.577168311642938e-05, "loss": 0.8483, "step": 32580 }, { "epoch": 2.85, "learning_rate": 3.5767315922788016e-05, "loss": 0.7909, "step": 32590 }, { "epoch": 2.85, "learning_rate": 3.576294872914665e-05, "loss": 0.8687, "step": 32600 }, { "epoch": 2.85, "learning_rate": 3.575858153550529e-05, "loss": 0.8529, "step": 32610 }, { "epoch": 2.85, "learning_rate": 3.5754214341863916e-05, "loss": 0.9204, "step": 32620 }, { "epoch": 2.85, "learning_rate": 3.5749847148222556e-05, "loss": 0.9534, "step": 32630 }, { "epoch": 2.85, "learning_rate": 3.574547995458119e-05, "loss": 0.8468, "step": 32640 }, { "epoch": 2.85, "learning_rate": 3.574111276093982e-05, "loss": 0.8563, "step": 32650 }, { "epoch": 2.85, "learning_rate": 3.5736745567298456e-05, "loss": 0.9261, "step": 32660 }, { "epoch": 2.85, "learning_rate": 3.573237837365709e-05, "loss": 0.9533, "step": 32670 }, { "epoch": 2.85, "learning_rate": 3.572801118001572e-05, "loss": 0.8493, "step": 32680 }, { "epoch": 2.86, "learning_rate": 3.5723643986374355e-05, "loss": 0.8436, "step": 32690 }, { "epoch": 2.86, "learning_rate": 3.5719276792732995e-05, "loss": 0.9649, "step": 32700 }, { "epoch": 2.86, "learning_rate": 3.571490959909162e-05, "loss": 0.7837, "step": 32710 }, { "epoch": 2.86, "learning_rate": 3.571054240545026e-05, "loss": 1.0539, "step": 32720 }, { "epoch": 2.86, "learning_rate": 3.5706175211808895e-05, "loss": 0.8843, "step": 32730 }, { "epoch": 2.86, "learning_rate": 3.570180801816753e-05, "loss": 0.8199, "step": 32740 }, { "epoch": 2.86, "learning_rate": 3.569744082452616e-05, "loss": 0.7991, "step": 32750 }, { "epoch": 2.86, "learning_rate": 3.5693073630884794e-05, "loss": 0.9486, "step": 32760 }, { "epoch": 2.86, "learning_rate": 3.568870643724343e-05, "loss": 0.8535, "step": 32770 }, { "epoch": 2.86, "learning_rate": 3.568433924360206e-05, "loss": 0.9214, "step": 32780 }, { "epoch": 2.86, "learning_rate": 3.5679972049960694e-05, "loss": 0.9739, "step": 32790 }, { "epoch": 2.86, "learning_rate": 3.5675604856319334e-05, "loss": 0.9556, "step": 32800 }, { "epoch": 2.87, "learning_rate": 3.567123766267797e-05, "loss": 0.9765, "step": 32810 }, { "epoch": 2.87, "learning_rate": 3.56668704690366e-05, "loss": 0.9093, "step": 32820 }, { "epoch": 2.87, "learning_rate": 3.5662503275395234e-05, "loss": 0.972, "step": 32830 }, { "epoch": 2.87, "learning_rate": 3.565813608175387e-05, "loss": 0.8315, "step": 32840 }, { "epoch": 2.87, "learning_rate": 3.56537688881125e-05, "loss": 0.8443, "step": 32850 }, { "epoch": 2.87, "learning_rate": 3.564940169447113e-05, "loss": 0.9462, "step": 32860 }, { "epoch": 2.87, "learning_rate": 3.5645034500829766e-05, "loss": 0.8609, "step": 32870 }, { "epoch": 2.87, "learning_rate": 3.56406673071884e-05, "loss": 0.8787, "step": 32880 }, { "epoch": 2.87, "learning_rate": 3.563630011354704e-05, "loss": 0.9766, "step": 32890 }, { "epoch": 2.87, "learning_rate": 3.563193291990567e-05, "loss": 1.0129, "step": 32900 }, { "epoch": 2.87, "learning_rate": 3.5627565726264306e-05, "loss": 1.0724, "step": 32910 }, { "epoch": 2.88, "learning_rate": 3.562319853262294e-05, "loss": 0.8128, "step": 32920 }, { "epoch": 2.88, "learning_rate": 3.561883133898157e-05, "loss": 0.8227, "step": 32930 }, { "epoch": 2.88, "learning_rate": 3.5614464145340206e-05, "loss": 0.785, "step": 32940 }, { "epoch": 2.88, "learning_rate": 3.561009695169884e-05, "loss": 0.9274, "step": 32950 }, { "epoch": 2.88, "learning_rate": 3.560572975805748e-05, "loss": 0.8945, "step": 32960 }, { "epoch": 2.88, "learning_rate": 3.5601362564416105e-05, "loss": 0.8248, "step": 32970 }, { "epoch": 2.88, "learning_rate": 3.5596995370774745e-05, "loss": 0.9042, "step": 32980 }, { "epoch": 2.88, "learning_rate": 3.559262817713337e-05, "loss": 0.91, "step": 32990 }, { "epoch": 2.88, "learning_rate": 3.558826098349201e-05, "loss": 0.8769, "step": 33000 }, { "epoch": 2.88, "learning_rate": 3.5583893789850645e-05, "loss": 0.9262, "step": 33010 }, { "epoch": 2.88, "learning_rate": 3.557952659620928e-05, "loss": 0.9182, "step": 33020 }, { "epoch": 2.88, "learning_rate": 3.557515940256791e-05, "loss": 0.8805, "step": 33030 }, { "epoch": 2.89, "learning_rate": 3.5570792208926544e-05, "loss": 0.8361, "step": 33040 }, { "epoch": 2.89, "learning_rate": 3.5566425015285184e-05, "loss": 0.7815, "step": 33050 }, { "epoch": 2.89, "learning_rate": 3.556205782164381e-05, "loss": 0.8394, "step": 33060 }, { "epoch": 2.89, "learning_rate": 3.555769062800245e-05, "loss": 0.848, "step": 33070 }, { "epoch": 2.89, "learning_rate": 3.555332343436108e-05, "loss": 0.9131, "step": 33080 }, { "epoch": 2.89, "learning_rate": 3.554895624071972e-05, "loss": 0.8752, "step": 33090 }, { "epoch": 2.89, "learning_rate": 3.554458904707835e-05, "loss": 0.9267, "step": 33100 }, { "epoch": 2.89, "learning_rate": 3.5540221853436984e-05, "loss": 0.9302, "step": 33110 }, { "epoch": 2.89, "learning_rate": 3.553585465979562e-05, "loss": 0.9191, "step": 33120 }, { "epoch": 2.89, "learning_rate": 3.553148746615425e-05, "loss": 0.8011, "step": 33130 }, { "epoch": 2.89, "learning_rate": 3.552712027251289e-05, "loss": 0.8981, "step": 33140 }, { "epoch": 2.9, "learning_rate": 3.5522753078871516e-05, "loss": 0.8593, "step": 33150 }, { "epoch": 2.9, "learning_rate": 3.5518385885230156e-05, "loss": 0.8256, "step": 33160 }, { "epoch": 2.9, "learning_rate": 3.551401869158878e-05, "loss": 0.8501, "step": 33170 }, { "epoch": 2.9, "learning_rate": 3.550965149794742e-05, "loss": 0.8409, "step": 33180 }, { "epoch": 2.9, "learning_rate": 3.550528430430605e-05, "loss": 0.8818, "step": 33190 }, { "epoch": 2.9, "learning_rate": 3.550091711066469e-05, "loss": 0.8558, "step": 33200 }, { "epoch": 2.9, "learning_rate": 3.549654991702332e-05, "loss": 0.8975, "step": 33210 }, { "epoch": 2.9, "learning_rate": 3.5492182723381956e-05, "loss": 0.8991, "step": 33220 }, { "epoch": 2.9, "learning_rate": 3.5487815529740596e-05, "loss": 0.9684, "step": 33230 }, { "epoch": 2.9, "learning_rate": 3.548344833609922e-05, "loss": 0.8138, "step": 33240 }, { "epoch": 2.9, "learning_rate": 3.547908114245786e-05, "loss": 0.884, "step": 33250 }, { "epoch": 2.91, "learning_rate": 3.547471394881649e-05, "loss": 0.766, "step": 33260 }, { "epoch": 2.91, "learning_rate": 3.547034675517513e-05, "loss": 0.9252, "step": 33270 }, { "epoch": 2.91, "learning_rate": 3.5465979561533755e-05, "loss": 0.856, "step": 33280 }, { "epoch": 2.91, "learning_rate": 3.5461612367892395e-05, "loss": 0.9005, "step": 33290 }, { "epoch": 2.91, "learning_rate": 3.545724517425103e-05, "loss": 0.835, "step": 33300 }, { "epoch": 2.91, "learning_rate": 3.545287798060966e-05, "loss": 0.8151, "step": 33310 }, { "epoch": 2.91, "learning_rate": 3.5448510786968294e-05, "loss": 0.8401, "step": 33320 }, { "epoch": 2.91, "learning_rate": 3.544414359332693e-05, "loss": 1.054, "step": 33330 }, { "epoch": 2.91, "learning_rate": 3.543977639968557e-05, "loss": 0.7953, "step": 33340 }, { "epoch": 2.91, "learning_rate": 3.5435409206044194e-05, "loss": 0.8432, "step": 33350 }, { "epoch": 2.91, "learning_rate": 3.5431042012402834e-05, "loss": 0.8792, "step": 33360 }, { "epoch": 2.91, "learning_rate": 3.542667481876147e-05, "loss": 0.8561, "step": 33370 }, { "epoch": 2.92, "learning_rate": 3.54223076251201e-05, "loss": 0.8802, "step": 33380 }, { "epoch": 2.92, "learning_rate": 3.5417940431478734e-05, "loss": 0.7915, "step": 33390 }, { "epoch": 2.92, "learning_rate": 3.541357323783737e-05, "loss": 0.77, "step": 33400 }, { "epoch": 2.92, "learning_rate": 3.5409206044196e-05, "loss": 1.0202, "step": 33410 }, { "epoch": 2.92, "learning_rate": 3.540483885055463e-05, "loss": 0.8132, "step": 33420 }, { "epoch": 2.92, "learning_rate": 3.540047165691327e-05, "loss": 0.8365, "step": 33430 }, { "epoch": 2.92, "learning_rate": 3.53961044632719e-05, "loss": 0.8153, "step": 33440 }, { "epoch": 2.92, "learning_rate": 3.539173726963054e-05, "loss": 0.8786, "step": 33450 }, { "epoch": 2.92, "learning_rate": 3.538737007598917e-05, "loss": 0.9782, "step": 33460 }, { "epoch": 2.92, "learning_rate": 3.5383002882347806e-05, "loss": 0.9631, "step": 33470 }, { "epoch": 2.92, "learning_rate": 3.537863568870644e-05, "loss": 0.8552, "step": 33480 }, { "epoch": 2.93, "learning_rate": 3.537426849506507e-05, "loss": 0.8429, "step": 33490 }, { "epoch": 2.93, "learning_rate": 3.5369901301423706e-05, "loss": 0.8336, "step": 33500 }, { "epoch": 2.93, "learning_rate": 3.536553410778234e-05, "loss": 0.8604, "step": 33510 }, { "epoch": 2.93, "learning_rate": 3.536116691414097e-05, "loss": 0.9319, "step": 33520 }, { "epoch": 2.93, "learning_rate": 3.5356799720499605e-05, "loss": 0.9256, "step": 33530 }, { "epoch": 2.93, "learning_rate": 3.5352432526858245e-05, "loss": 0.8423, "step": 33540 }, { "epoch": 2.93, "learning_rate": 3.534806533321688e-05, "loss": 0.9648, "step": 33550 }, { "epoch": 2.93, "learning_rate": 3.534369813957551e-05, "loss": 1.0002, "step": 33560 }, { "epoch": 2.93, "learning_rate": 3.5339330945934145e-05, "loss": 0.8719, "step": 33570 }, { "epoch": 2.93, "learning_rate": 3.533496375229278e-05, "loss": 0.8496, "step": 33580 }, { "epoch": 2.93, "learning_rate": 3.533059655865141e-05, "loss": 0.8903, "step": 33590 }, { "epoch": 2.93, "learning_rate": 3.5326229365010044e-05, "loss": 0.8984, "step": 33600 }, { "epoch": 2.94, "learning_rate": 3.532186217136868e-05, "loss": 0.9814, "step": 33610 }, { "epoch": 2.94, "learning_rate": 3.531749497772732e-05, "loss": 0.8566, "step": 33620 }, { "epoch": 2.94, "learning_rate": 3.531312778408595e-05, "loss": 0.9102, "step": 33630 }, { "epoch": 2.94, "learning_rate": 3.5308760590444584e-05, "loss": 0.9665, "step": 33640 }, { "epoch": 2.94, "learning_rate": 3.530439339680322e-05, "loss": 0.8606, "step": 33650 }, { "epoch": 2.94, "learning_rate": 3.530002620316185e-05, "loss": 0.9535, "step": 33660 }, { "epoch": 2.94, "learning_rate": 3.5295659009520484e-05, "loss": 0.9405, "step": 33670 }, { "epoch": 2.94, "learning_rate": 3.529129181587912e-05, "loss": 0.893, "step": 33680 }, { "epoch": 2.94, "learning_rate": 3.528692462223775e-05, "loss": 1.0814, "step": 33690 }, { "epoch": 2.94, "learning_rate": 3.528255742859638e-05, "loss": 0.7987, "step": 33700 }, { "epoch": 2.94, "learning_rate": 3.527819023495502e-05, "loss": 0.8133, "step": 33710 }, { "epoch": 2.95, "learning_rate": 3.527382304131365e-05, "loss": 0.7475, "step": 33720 }, { "epoch": 2.95, "learning_rate": 3.526945584767229e-05, "loss": 0.9253, "step": 33730 }, { "epoch": 2.95, "learning_rate": 3.526508865403092e-05, "loss": 0.8634, "step": 33740 }, { "epoch": 2.95, "learning_rate": 3.5260721460389556e-05, "loss": 0.9544, "step": 33750 }, { "epoch": 2.95, "learning_rate": 3.525635426674819e-05, "loss": 0.8355, "step": 33760 }, { "epoch": 2.95, "learning_rate": 3.525198707310682e-05, "loss": 0.7776, "step": 33770 }, { "epoch": 2.95, "learning_rate": 3.524761987946546e-05, "loss": 0.8795, "step": 33780 }, { "epoch": 2.95, "learning_rate": 3.524325268582409e-05, "loss": 0.981, "step": 33790 }, { "epoch": 2.95, "learning_rate": 3.523888549218273e-05, "loss": 0.8933, "step": 33800 }, { "epoch": 2.95, "learning_rate": 3.5234518298541355e-05, "loss": 0.8578, "step": 33810 }, { "epoch": 2.95, "learning_rate": 3.5230151104899995e-05, "loss": 0.8661, "step": 33820 }, { "epoch": 2.95, "learning_rate": 3.522578391125863e-05, "loss": 0.8563, "step": 33830 }, { "epoch": 2.96, "learning_rate": 3.522141671761726e-05, "loss": 0.9361, "step": 33840 }, { "epoch": 2.96, "learning_rate": 3.5217049523975895e-05, "loss": 0.9242, "step": 33850 }, { "epoch": 2.96, "learning_rate": 3.521268233033453e-05, "loss": 0.9063, "step": 33860 }, { "epoch": 2.96, "learning_rate": 3.520831513669317e-05, "loss": 1.0139, "step": 33870 }, { "epoch": 2.96, "learning_rate": 3.5203947943051795e-05, "loss": 0.7928, "step": 33880 }, { "epoch": 2.96, "learning_rate": 3.5199580749410434e-05, "loss": 0.8645, "step": 33890 }, { "epoch": 2.96, "learning_rate": 3.519521355576906e-05, "loss": 0.8043, "step": 33900 }, { "epoch": 2.96, "learning_rate": 3.51908463621277e-05, "loss": 0.8156, "step": 33910 }, { "epoch": 2.96, "learning_rate": 3.518647916848633e-05, "loss": 0.9783, "step": 33920 }, { "epoch": 2.96, "learning_rate": 3.518211197484497e-05, "loss": 0.8923, "step": 33930 }, { "epoch": 2.96, "learning_rate": 3.51777447812036e-05, "loss": 0.8076, "step": 33940 }, { "epoch": 2.97, "learning_rate": 3.5173377587562234e-05, "loss": 0.8826, "step": 33950 }, { "epoch": 2.97, "learning_rate": 3.5169010393920874e-05, "loss": 0.9647, "step": 33960 }, { "epoch": 2.97, "learning_rate": 3.51646432002795e-05, "loss": 0.9384, "step": 33970 }, { "epoch": 2.97, "learning_rate": 3.516027600663814e-05, "loss": 0.8704, "step": 33980 }, { "epoch": 2.97, "learning_rate": 3.5155908812996767e-05, "loss": 0.8816, "step": 33990 }, { "epoch": 2.97, "learning_rate": 3.5151541619355406e-05, "loss": 0.9729, "step": 34000 }, { "epoch": 2.97, "learning_rate": 3.514717442571403e-05, "loss": 0.8491, "step": 34010 }, { "epoch": 2.97, "learning_rate": 3.514280723207267e-05, "loss": 0.8071, "step": 34020 }, { "epoch": 2.97, "learning_rate": 3.5138440038431306e-05, "loss": 0.8743, "step": 34030 }, { "epoch": 2.97, "learning_rate": 3.513407284478994e-05, "loss": 0.7653, "step": 34040 }, { "epoch": 2.97, "learning_rate": 3.512970565114857e-05, "loss": 0.9276, "step": 34050 }, { "epoch": 2.97, "learning_rate": 3.5125338457507206e-05, "loss": 0.8856, "step": 34060 }, { "epoch": 2.98, "learning_rate": 3.5120971263865846e-05, "loss": 0.8502, "step": 34070 }, { "epoch": 2.98, "learning_rate": 3.511660407022447e-05, "loss": 0.775, "step": 34080 }, { "epoch": 2.98, "learning_rate": 3.511223687658311e-05, "loss": 0.9047, "step": 34090 }, { "epoch": 2.98, "learning_rate": 3.510786968294174e-05, "loss": 0.88, "step": 34100 }, { "epoch": 2.98, "learning_rate": 3.510350248930038e-05, "loss": 0.8273, "step": 34110 }, { "epoch": 2.98, "learning_rate": 3.509913529565901e-05, "loss": 0.7624, "step": 34120 }, { "epoch": 2.98, "learning_rate": 3.5094768102017645e-05, "loss": 0.8721, "step": 34130 }, { "epoch": 2.98, "learning_rate": 3.509040090837628e-05, "loss": 0.8909, "step": 34140 }, { "epoch": 2.98, "learning_rate": 3.508603371473491e-05, "loss": 0.8245, "step": 34150 }, { "epoch": 2.98, "learning_rate": 3.508166652109355e-05, "loss": 0.8409, "step": 34160 }, { "epoch": 2.98, "learning_rate": 3.507729932745218e-05, "loss": 0.7988, "step": 34170 }, { "epoch": 2.99, "learning_rate": 3.507293213381082e-05, "loss": 0.8641, "step": 34180 }, { "epoch": 2.99, "learning_rate": 3.5068564940169444e-05, "loss": 0.8359, "step": 34190 }, { "epoch": 2.99, "learning_rate": 3.5064197746528084e-05, "loss": 0.8981, "step": 34200 }, { "epoch": 2.99, "learning_rate": 3.505983055288672e-05, "loss": 0.8315, "step": 34210 }, { "epoch": 2.99, "learning_rate": 3.505546335924535e-05, "loss": 0.8974, "step": 34220 }, { "epoch": 2.99, "learning_rate": 3.5051096165603984e-05, "loss": 0.9257, "step": 34230 }, { "epoch": 2.99, "learning_rate": 3.504672897196262e-05, "loss": 0.948, "step": 34240 }, { "epoch": 2.99, "learning_rate": 3.504236177832125e-05, "loss": 0.8309, "step": 34250 }, { "epoch": 2.99, "learning_rate": 3.503799458467988e-05, "loss": 0.9047, "step": 34260 }, { "epoch": 2.99, "learning_rate": 3.503362739103852e-05, "loss": 0.8787, "step": 34270 }, { "epoch": 2.99, "learning_rate": 3.5029260197397157e-05, "loss": 0.9212, "step": 34280 }, { "epoch": 3.0, "learning_rate": 3.502489300375579e-05, "loss": 0.9506, "step": 34290 }, { "epoch": 3.0, "learning_rate": 3.502052581011442e-05, "loss": 0.8778, "step": 34300 }, { "epoch": 3.0, "learning_rate": 3.5016158616473056e-05, "loss": 1.0212, "step": 34310 }, { "epoch": 3.0, "learning_rate": 3.501179142283169e-05, "loss": 0.9111, "step": 34320 }, { "epoch": 3.0, "learning_rate": 3.500742422919032e-05, "loss": 0.9281, "step": 34330 }, { "epoch": 3.0, "learning_rate": 3.5003057035548956e-05, "loss": 0.8172, "step": 34340 }, { "epoch": 3.0, "learning_rate": 3.499868984190759e-05, "loss": 1.0171, "step": 34350 }, { "epoch": 3.0, "learning_rate": 3.499432264826623e-05, "loss": 0.938, "step": 34360 }, { "epoch": 3.0, "learning_rate": 3.498995545462486e-05, "loss": 0.8832, "step": 34370 }, { "epoch": 3.0, "learning_rate": 3.4985588260983495e-05, "loss": 0.9279, "step": 34380 }, { "epoch": 3.0, "learning_rate": 3.498122106734213e-05, "loss": 0.7592, "step": 34390 }, { "epoch": 3.0, "learning_rate": 3.497685387370076e-05, "loss": 0.9636, "step": 34400 }, { "epoch": 3.01, "learning_rate": 3.4972486680059395e-05, "loss": 0.7962, "step": 34410 }, { "epoch": 3.01, "learning_rate": 3.496811948641803e-05, "loss": 0.9219, "step": 34420 }, { "epoch": 3.01, "learning_rate": 3.496375229277666e-05, "loss": 0.8833, "step": 34430 }, { "epoch": 3.01, "learning_rate": 3.49593850991353e-05, "loss": 0.7864, "step": 34440 }, { "epoch": 3.01, "learning_rate": 3.495501790549393e-05, "loss": 0.9062, "step": 34450 }, { "epoch": 3.01, "learning_rate": 3.495065071185257e-05, "loss": 0.7872, "step": 34460 }, { "epoch": 3.01, "learning_rate": 3.49462835182112e-05, "loss": 0.7374, "step": 34470 }, { "epoch": 3.01, "learning_rate": 3.4941916324569834e-05, "loss": 0.8927, "step": 34480 }, { "epoch": 3.01, "learning_rate": 3.493754913092847e-05, "loss": 1.083, "step": 34490 }, { "epoch": 3.01, "learning_rate": 3.49331819372871e-05, "loss": 0.9656, "step": 34500 }, { "epoch": 3.01, "learning_rate": 3.4928814743645734e-05, "loss": 0.9358, "step": 34510 }, { "epoch": 3.02, "learning_rate": 3.492444755000437e-05, "loss": 0.9888, "step": 34520 }, { "epoch": 3.02, "learning_rate": 3.492008035636301e-05, "loss": 0.972, "step": 34530 }, { "epoch": 3.02, "learning_rate": 3.491571316272163e-05, "loss": 0.8618, "step": 34540 }, { "epoch": 3.02, "learning_rate": 3.491134596908027e-05, "loss": 0.929, "step": 34550 }, { "epoch": 3.02, "learning_rate": 3.4906978775438907e-05, "loss": 0.8937, "step": 34560 }, { "epoch": 3.02, "learning_rate": 3.490261158179754e-05, "loss": 0.8863, "step": 34570 }, { "epoch": 3.02, "learning_rate": 3.489824438815617e-05, "loss": 0.9848, "step": 34580 }, { "epoch": 3.02, "learning_rate": 3.4893877194514806e-05, "loss": 0.9367, "step": 34590 }, { "epoch": 3.02, "learning_rate": 3.488951000087344e-05, "loss": 1.0042, "step": 34600 }, { "epoch": 3.02, "learning_rate": 3.488514280723207e-05, "loss": 0.9252, "step": 34610 }, { "epoch": 3.02, "learning_rate": 3.488077561359071e-05, "loss": 0.9834, "step": 34620 }, { "epoch": 3.02, "learning_rate": 3.487640841994934e-05, "loss": 0.7942, "step": 34630 }, { "epoch": 3.03, "learning_rate": 3.487204122630798e-05, "loss": 0.8642, "step": 34640 }, { "epoch": 3.03, "learning_rate": 3.4867674032666605e-05, "loss": 0.8466, "step": 34650 }, { "epoch": 3.03, "learning_rate": 3.4863306839025245e-05, "loss": 0.8247, "step": 34660 }, { "epoch": 3.03, "learning_rate": 3.485893964538388e-05, "loss": 0.94, "step": 34670 }, { "epoch": 3.03, "learning_rate": 3.485457245174251e-05, "loss": 0.8429, "step": 34680 }, { "epoch": 3.03, "learning_rate": 3.485020525810115e-05, "loss": 0.8034, "step": 34690 }, { "epoch": 3.03, "learning_rate": 3.484583806445978e-05, "loss": 0.8194, "step": 34700 }, { "epoch": 3.03, "learning_rate": 3.484147087081842e-05, "loss": 0.9079, "step": 34710 }, { "epoch": 3.03, "learning_rate": 3.4837103677177045e-05, "loss": 0.8624, "step": 34720 }, { "epoch": 3.03, "learning_rate": 3.4832736483535685e-05, "loss": 0.9443, "step": 34730 }, { "epoch": 3.03, "learning_rate": 3.482836928989431e-05, "loss": 0.8354, "step": 34740 }, { "epoch": 3.04, "learning_rate": 3.482400209625295e-05, "loss": 0.8489, "step": 34750 }, { "epoch": 3.04, "learning_rate": 3.4819634902611584e-05, "loss": 0.8837, "step": 34760 }, { "epoch": 3.04, "learning_rate": 3.481526770897022e-05, "loss": 0.8671, "step": 34770 }, { "epoch": 3.04, "learning_rate": 3.481090051532885e-05, "loss": 0.8916, "step": 34780 }, { "epoch": 3.04, "learning_rate": 3.4806533321687484e-05, "loss": 0.994, "step": 34790 }, { "epoch": 3.04, "learning_rate": 3.4802166128046124e-05, "loss": 0.9229, "step": 34800 }, { "epoch": 3.04, "learning_rate": 3.479779893440475e-05, "loss": 0.9352, "step": 34810 }, { "epoch": 3.04, "learning_rate": 3.479343174076339e-05, "loss": 0.8805, "step": 34820 }, { "epoch": 3.04, "learning_rate": 3.4789064547122017e-05, "loss": 0.8819, "step": 34830 }, { "epoch": 3.04, "learning_rate": 3.4784697353480657e-05, "loss": 0.8586, "step": 34840 }, { "epoch": 3.04, "learning_rate": 3.478033015983929e-05, "loss": 0.9049, "step": 34850 }, { "epoch": 3.04, "learning_rate": 3.477596296619792e-05, "loss": 1.0219, "step": 34860 }, { "epoch": 3.05, "learning_rate": 3.4771595772556556e-05, "loss": 0.9363, "step": 34870 }, { "epoch": 3.05, "learning_rate": 3.476722857891519e-05, "loss": 0.8157, "step": 34880 }, { "epoch": 3.05, "learning_rate": 3.476286138527383e-05, "loss": 0.783, "step": 34890 }, { "epoch": 3.05, "learning_rate": 3.4758494191632456e-05, "loss": 0.8574, "step": 34900 }, { "epoch": 3.05, "learning_rate": 3.4754126997991096e-05, "loss": 0.9588, "step": 34910 }, { "epoch": 3.05, "learning_rate": 3.474975980434972e-05, "loss": 0.8942, "step": 34920 }, { "epoch": 3.05, "learning_rate": 3.474539261070836e-05, "loss": 0.9236, "step": 34930 }, { "epoch": 3.05, "learning_rate": 3.4741025417066995e-05, "loss": 1.0235, "step": 34940 }, { "epoch": 3.05, "learning_rate": 3.473665822342563e-05, "loss": 0.6991, "step": 34950 }, { "epoch": 3.05, "learning_rate": 3.473229102978426e-05, "loss": 0.8242, "step": 34960 }, { "epoch": 3.05, "learning_rate": 3.4727923836142895e-05, "loss": 0.8821, "step": 34970 }, { "epoch": 3.06, "learning_rate": 3.472355664250153e-05, "loss": 0.7784, "step": 34980 }, { "epoch": 3.06, "learning_rate": 3.471918944886016e-05, "loss": 0.9724, "step": 34990 }, { "epoch": 3.06, "learning_rate": 3.47148222552188e-05, "loss": 0.8615, "step": 35000 }, { "epoch": 3.06, "eval_accuracy": 0.5885820968851332, "eval_loss": 0.882483720779419, "eval_runtime": 84.0427, "eval_samples_per_second": 121.093, "eval_steps_per_second": 15.147, "step": 35000 }, { "epoch": 3.06, "learning_rate": 3.471045506157743e-05, "loss": 1.0345, "step": 35010 }, { "epoch": 3.06, "learning_rate": 3.470608786793607e-05, "loss": 0.7732, "step": 35020 }, { "epoch": 3.06, "learning_rate": 3.47017206742947e-05, "loss": 0.7744, "step": 35030 }, { "epoch": 3.06, "learning_rate": 3.4697353480653334e-05, "loss": 1.058, "step": 35040 }, { "epoch": 3.06, "learning_rate": 3.469298628701197e-05, "loss": 0.8635, "step": 35050 }, { "epoch": 3.06, "learning_rate": 3.46886190933706e-05, "loss": 0.7907, "step": 35060 }, { "epoch": 3.06, "learning_rate": 3.4684251899729234e-05, "loss": 0.8665, "step": 35070 }, { "epoch": 3.06, "learning_rate": 3.467988470608787e-05, "loss": 0.9788, "step": 35080 }, { "epoch": 3.06, "learning_rate": 3.467551751244651e-05, "loss": 0.9071, "step": 35090 }, { "epoch": 3.07, "learning_rate": 3.467115031880514e-05, "loss": 0.8321, "step": 35100 }, { "epoch": 3.07, "learning_rate": 3.466678312516377e-05, "loss": 0.9209, "step": 35110 }, { "epoch": 3.07, "learning_rate": 3.4662415931522407e-05, "loss": 0.8042, "step": 35120 }, { "epoch": 3.07, "learning_rate": 3.465804873788104e-05, "loss": 0.9322, "step": 35130 }, { "epoch": 3.07, "learning_rate": 3.465368154423967e-05, "loss": 0.8444, "step": 35140 }, { "epoch": 3.07, "learning_rate": 3.4649314350598306e-05, "loss": 0.8347, "step": 35150 }, { "epoch": 3.07, "learning_rate": 3.464494715695694e-05, "loss": 0.8465, "step": 35160 }, { "epoch": 3.07, "learning_rate": 3.464057996331557e-05, "loss": 0.8566, "step": 35170 }, { "epoch": 3.07, "learning_rate": 3.4636212769674206e-05, "loss": 0.877, "step": 35180 }, { "epoch": 3.07, "learning_rate": 3.4631845576032846e-05, "loss": 0.9483, "step": 35190 }, { "epoch": 3.07, "learning_rate": 3.462747838239148e-05, "loss": 0.8888, "step": 35200 }, { "epoch": 3.08, "learning_rate": 3.462311118875011e-05, "loss": 1.0432, "step": 35210 }, { "epoch": 3.08, "learning_rate": 3.4618743995108745e-05, "loss": 0.873, "step": 35220 }, { "epoch": 3.08, "learning_rate": 3.461437680146738e-05, "loss": 1.0026, "step": 35230 }, { "epoch": 3.08, "learning_rate": 3.461000960782601e-05, "loss": 1.0515, "step": 35240 }, { "epoch": 3.08, "learning_rate": 3.4605642414184645e-05, "loss": 0.9022, "step": 35250 }, { "epoch": 3.08, "learning_rate": 3.4601275220543285e-05, "loss": 0.8203, "step": 35260 }, { "epoch": 3.08, "learning_rate": 3.459690802690191e-05, "loss": 0.8713, "step": 35270 }, { "epoch": 3.08, "learning_rate": 3.459254083326055e-05, "loss": 0.8287, "step": 35280 }, { "epoch": 3.08, "learning_rate": 3.4588173639619185e-05, "loss": 0.9076, "step": 35290 }, { "epoch": 3.08, "learning_rate": 3.458380644597782e-05, "loss": 0.8194, "step": 35300 }, { "epoch": 3.08, "learning_rate": 3.457943925233645e-05, "loss": 0.9893, "step": 35310 }, { "epoch": 3.08, "learning_rate": 3.4575072058695084e-05, "loss": 0.8751, "step": 35320 }, { "epoch": 3.09, "learning_rate": 3.457070486505372e-05, "loss": 0.9858, "step": 35330 }, { "epoch": 3.09, "learning_rate": 3.456633767141235e-05, "loss": 0.8483, "step": 35340 }, { "epoch": 3.09, "learning_rate": 3.456197047777099e-05, "loss": 0.7207, "step": 35350 }, { "epoch": 3.09, "learning_rate": 3.455760328412962e-05, "loss": 0.9631, "step": 35360 }, { "epoch": 3.09, "learning_rate": 3.455323609048826e-05, "loss": 0.798, "step": 35370 }, { "epoch": 3.09, "learning_rate": 3.454886889684688e-05, "loss": 0.7873, "step": 35380 }, { "epoch": 3.09, "learning_rate": 3.454450170320552e-05, "loss": 0.801, "step": 35390 }, { "epoch": 3.09, "learning_rate": 3.4540134509564157e-05, "loss": 1.0151, "step": 35400 }, { "epoch": 3.09, "learning_rate": 3.453576731592279e-05, "loss": 1.0433, "step": 35410 }, { "epoch": 3.09, "learning_rate": 3.453140012228142e-05, "loss": 0.813, "step": 35420 }, { "epoch": 3.09, "learning_rate": 3.4527032928640056e-05, "loss": 0.8712, "step": 35430 }, { "epoch": 3.1, "learning_rate": 3.4522665734998696e-05, "loss": 0.882, "step": 35440 }, { "epoch": 3.1, "learning_rate": 3.451829854135732e-05, "loss": 0.94, "step": 35450 }, { "epoch": 3.1, "learning_rate": 3.451393134771596e-05, "loss": 0.8956, "step": 35460 }, { "epoch": 3.1, "learning_rate": 3.450956415407459e-05, "loss": 0.841, "step": 35470 }, { "epoch": 3.1, "learning_rate": 3.450519696043323e-05, "loss": 1.0206, "step": 35480 }, { "epoch": 3.1, "learning_rate": 3.450082976679186e-05, "loss": 0.9371, "step": 35490 }, { "epoch": 3.1, "learning_rate": 3.4496462573150495e-05, "loss": 0.9155, "step": 35500 }, { "epoch": 3.1, "learning_rate": 3.449209537950913e-05, "loss": 0.9004, "step": 35510 }, { "epoch": 3.1, "learning_rate": 3.448772818586776e-05, "loss": 0.8114, "step": 35520 }, { "epoch": 3.1, "learning_rate": 3.44833609922264e-05, "loss": 0.7819, "step": 35530 }, { "epoch": 3.1, "learning_rate": 3.447899379858503e-05, "loss": 0.9102, "step": 35540 }, { "epoch": 3.11, "learning_rate": 3.447462660494367e-05, "loss": 0.8473, "step": 35550 }, { "epoch": 3.11, "learning_rate": 3.4470259411302295e-05, "loss": 0.8636, "step": 35560 }, { "epoch": 3.11, "learning_rate": 3.4465892217660935e-05, "loss": 0.8907, "step": 35570 }, { "epoch": 3.11, "learning_rate": 3.446152502401956e-05, "loss": 0.8656, "step": 35580 }, { "epoch": 3.11, "learning_rate": 3.44571578303782e-05, "loss": 0.8956, "step": 35590 }, { "epoch": 3.11, "learning_rate": 3.4452790636736834e-05, "loss": 0.9364, "step": 35600 }, { "epoch": 3.11, "learning_rate": 3.444842344309547e-05, "loss": 0.9816, "step": 35610 }, { "epoch": 3.11, "learning_rate": 3.444405624945411e-05, "loss": 0.8587, "step": 35620 }, { "epoch": 3.11, "learning_rate": 3.4439689055812734e-05, "loss": 0.9352, "step": 35630 }, { "epoch": 3.11, "learning_rate": 3.4435321862171374e-05, "loss": 0.7623, "step": 35640 }, { "epoch": 3.11, "learning_rate": 3.443095466853e-05, "loss": 0.9557, "step": 35650 }, { "epoch": 3.11, "learning_rate": 3.442658747488864e-05, "loss": 1.0131, "step": 35660 }, { "epoch": 3.12, "learning_rate": 3.4422220281247267e-05, "loss": 0.9015, "step": 35670 }, { "epoch": 3.12, "learning_rate": 3.4417853087605907e-05, "loss": 0.8346, "step": 35680 }, { "epoch": 3.12, "learning_rate": 3.441348589396454e-05, "loss": 0.9611, "step": 35690 }, { "epoch": 3.12, "learning_rate": 3.440911870032317e-05, "loss": 0.9278, "step": 35700 }, { "epoch": 3.12, "learning_rate": 3.4404751506681806e-05, "loss": 0.9588, "step": 35710 }, { "epoch": 3.12, "learning_rate": 3.440038431304044e-05, "loss": 0.9484, "step": 35720 }, { "epoch": 3.12, "learning_rate": 3.439601711939908e-05, "loss": 1.0451, "step": 35730 }, { "epoch": 3.12, "learning_rate": 3.4391649925757706e-05, "loss": 0.801, "step": 35740 }, { "epoch": 3.12, "learning_rate": 3.4387282732116346e-05, "loss": 0.9293, "step": 35750 }, { "epoch": 3.12, "learning_rate": 3.438291553847498e-05, "loss": 0.9039, "step": 35760 }, { "epoch": 3.12, "learning_rate": 3.437854834483361e-05, "loss": 0.8244, "step": 35770 }, { "epoch": 3.13, "learning_rate": 3.4374181151192245e-05, "loss": 0.8739, "step": 35780 }, { "epoch": 3.13, "learning_rate": 3.436981395755088e-05, "loss": 0.8162, "step": 35790 }, { "epoch": 3.13, "learning_rate": 3.436544676390951e-05, "loss": 0.9777, "step": 35800 }, { "epoch": 3.13, "learning_rate": 3.4361079570268145e-05, "loss": 0.839, "step": 35810 }, { "epoch": 3.13, "learning_rate": 3.4356712376626785e-05, "loss": 0.9514, "step": 35820 }, { "epoch": 3.13, "learning_rate": 3.435234518298541e-05, "loss": 0.9178, "step": 35830 }, { "epoch": 3.13, "learning_rate": 3.434797798934405e-05, "loss": 0.7832, "step": 35840 }, { "epoch": 3.13, "learning_rate": 3.4343610795702685e-05, "loss": 0.8735, "step": 35850 }, { "epoch": 3.13, "learning_rate": 3.433924360206132e-05, "loss": 0.8604, "step": 35860 }, { "epoch": 3.13, "learning_rate": 3.433487640841995e-05, "loss": 1.109, "step": 35870 }, { "epoch": 3.13, "learning_rate": 3.4330509214778584e-05, "loss": 0.8711, "step": 35880 }, { "epoch": 3.13, "learning_rate": 3.432614202113722e-05, "loss": 0.8646, "step": 35890 }, { "epoch": 3.14, "learning_rate": 3.432177482749585e-05, "loss": 0.8587, "step": 35900 }, { "epoch": 3.14, "learning_rate": 3.4317407633854484e-05, "loss": 0.8208, "step": 35910 }, { "epoch": 3.14, "learning_rate": 3.4313040440213124e-05, "loss": 0.9191, "step": 35920 }, { "epoch": 3.14, "learning_rate": 3.430867324657176e-05, "loss": 0.8553, "step": 35930 }, { "epoch": 3.14, "learning_rate": 3.430430605293039e-05, "loss": 0.8445, "step": 35940 }, { "epoch": 3.14, "learning_rate": 3.429993885928902e-05, "loss": 0.8122, "step": 35950 }, { "epoch": 3.14, "learning_rate": 3.4295571665647657e-05, "loss": 0.8396, "step": 35960 }, { "epoch": 3.14, "learning_rate": 3.429120447200629e-05, "loss": 0.8943, "step": 35970 }, { "epoch": 3.14, "learning_rate": 3.428683727836492e-05, "loss": 0.8213, "step": 35980 }, { "epoch": 3.14, "learning_rate": 3.4282470084723556e-05, "loss": 0.8706, "step": 35990 }, { "epoch": 3.14, "learning_rate": 3.427810289108219e-05, "loss": 0.8227, "step": 36000 }, { "epoch": 3.15, "learning_rate": 3.427373569744083e-05, "loss": 0.9031, "step": 36010 }, { "epoch": 3.15, "learning_rate": 3.426936850379946e-05, "loss": 0.8812, "step": 36020 }, { "epoch": 3.15, "learning_rate": 3.4265001310158096e-05, "loss": 1.0123, "step": 36030 }, { "epoch": 3.15, "learning_rate": 3.426063411651673e-05, "loss": 0.8268, "step": 36040 }, { "epoch": 3.15, "learning_rate": 3.425626692287536e-05, "loss": 0.9628, "step": 36050 }, { "epoch": 3.15, "learning_rate": 3.4251899729233995e-05, "loss": 0.8805, "step": 36060 }, { "epoch": 3.15, "learning_rate": 3.424753253559263e-05, "loss": 0.7904, "step": 36070 }, { "epoch": 3.15, "learning_rate": 3.424316534195126e-05, "loss": 1.0054, "step": 36080 }, { "epoch": 3.15, "learning_rate": 3.4238798148309895e-05, "loss": 0.889, "step": 36090 }, { "epoch": 3.15, "learning_rate": 3.4234430954668535e-05, "loss": 0.9051, "step": 36100 }, { "epoch": 3.15, "learning_rate": 3.423006376102716e-05, "loss": 0.9296, "step": 36110 }, { "epoch": 3.15, "learning_rate": 3.42256965673858e-05, "loss": 0.9426, "step": 36120 }, { "epoch": 3.16, "learning_rate": 3.4221329373744435e-05, "loss": 0.9493, "step": 36130 }, { "epoch": 3.16, "learning_rate": 3.421696218010307e-05, "loss": 0.916, "step": 36140 }, { "epoch": 3.16, "learning_rate": 3.42125949864617e-05, "loss": 0.8907, "step": 36150 }, { "epoch": 3.16, "learning_rate": 3.4208227792820334e-05, "loss": 0.8568, "step": 36160 }, { "epoch": 3.16, "learning_rate": 3.4203860599178974e-05, "loss": 0.9123, "step": 36170 }, { "epoch": 3.16, "learning_rate": 3.41994934055376e-05, "loss": 0.9181, "step": 36180 }, { "epoch": 3.16, "learning_rate": 3.419512621189624e-05, "loss": 0.8457, "step": 36190 }, { "epoch": 3.16, "learning_rate": 3.419075901825487e-05, "loss": 0.934, "step": 36200 }, { "epoch": 3.16, "learning_rate": 3.418639182461351e-05, "loss": 0.876, "step": 36210 }, { "epoch": 3.16, "learning_rate": 3.418202463097214e-05, "loss": 0.8449, "step": 36220 }, { "epoch": 3.16, "learning_rate": 3.417765743733077e-05, "loss": 0.9457, "step": 36230 }, { "epoch": 3.17, "learning_rate": 3.4173290243689407e-05, "loss": 0.8618, "step": 36240 }, { "epoch": 3.17, "learning_rate": 3.416892305004804e-05, "loss": 0.9077, "step": 36250 }, { "epoch": 3.17, "learning_rate": 3.416455585640668e-05, "loss": 0.942, "step": 36260 }, { "epoch": 3.17, "learning_rate": 3.4160188662765306e-05, "loss": 0.995, "step": 36270 }, { "epoch": 3.17, "learning_rate": 3.4155821469123946e-05, "loss": 0.7341, "step": 36280 }, { "epoch": 3.17, "learning_rate": 3.415145427548257e-05, "loss": 0.8964, "step": 36290 }, { "epoch": 3.17, "learning_rate": 3.414708708184121e-05, "loss": 0.9073, "step": 36300 }, { "epoch": 3.17, "learning_rate": 3.414271988819984e-05, "loss": 0.8333, "step": 36310 }, { "epoch": 3.17, "learning_rate": 3.413835269455848e-05, "loss": 0.8367, "step": 36320 }, { "epoch": 3.17, "learning_rate": 3.413398550091711e-05, "loss": 0.9353, "step": 36330 }, { "epoch": 3.17, "learning_rate": 3.4129618307275745e-05, "loss": 0.8865, "step": 36340 }, { "epoch": 3.17, "learning_rate": 3.4125251113634385e-05, "loss": 0.7973, "step": 36350 }, { "epoch": 3.18, "learning_rate": 3.412088391999301e-05, "loss": 0.9568, "step": 36360 }, { "epoch": 3.18, "learning_rate": 3.411651672635165e-05, "loss": 0.9927, "step": 36370 }, { "epoch": 3.18, "learning_rate": 3.411214953271028e-05, "loss": 0.9476, "step": 36380 }, { "epoch": 3.18, "learning_rate": 3.410778233906892e-05, "loss": 0.8401, "step": 36390 }, { "epoch": 3.18, "learning_rate": 3.4103415145427545e-05, "loss": 0.7536, "step": 36400 }, { "epoch": 3.18, "learning_rate": 3.4099047951786185e-05, "loss": 0.8481, "step": 36410 }, { "epoch": 3.18, "learning_rate": 3.409468075814482e-05, "loss": 1.0072, "step": 36420 }, { "epoch": 3.18, "learning_rate": 3.409031356450345e-05, "loss": 0.978, "step": 36430 }, { "epoch": 3.18, "learning_rate": 3.4085946370862084e-05, "loss": 0.7873, "step": 36440 }, { "epoch": 3.18, "learning_rate": 3.408157917722072e-05, "loss": 0.8129, "step": 36450 }, { "epoch": 3.18, "learning_rate": 3.407721198357936e-05, "loss": 0.8795, "step": 36460 }, { "epoch": 3.19, "learning_rate": 3.4072844789937984e-05, "loss": 0.8698, "step": 36470 }, { "epoch": 3.19, "learning_rate": 3.4068477596296624e-05, "loss": 0.9103, "step": 36480 }, { "epoch": 3.19, "learning_rate": 3.406411040265525e-05, "loss": 1.0156, "step": 36490 }, { "epoch": 3.19, "learning_rate": 3.405974320901389e-05, "loss": 0.9182, "step": 36500 }, { "epoch": 3.19, "learning_rate": 3.4055376015372523e-05, "loss": 1.0087, "step": 36510 }, { "epoch": 3.19, "learning_rate": 3.4051008821731157e-05, "loss": 0.8094, "step": 36520 }, { "epoch": 3.19, "learning_rate": 3.404664162808979e-05, "loss": 0.9239, "step": 36530 }, { "epoch": 3.19, "learning_rate": 3.404227443444842e-05, "loss": 0.862, "step": 36540 }, { "epoch": 3.19, "learning_rate": 3.403790724080706e-05, "loss": 0.8401, "step": 36550 }, { "epoch": 3.19, "learning_rate": 3.403354004716569e-05, "loss": 0.8969, "step": 36560 }, { "epoch": 3.19, "learning_rate": 3.402917285352433e-05, "loss": 0.9551, "step": 36570 }, { "epoch": 3.2, "learning_rate": 3.402480565988296e-05, "loss": 0.8493, "step": 36580 }, { "epoch": 3.2, "learning_rate": 3.4020438466241596e-05, "loss": 0.8277, "step": 36590 }, { "epoch": 3.2, "learning_rate": 3.401607127260023e-05, "loss": 0.8707, "step": 36600 }, { "epoch": 3.2, "learning_rate": 3.401170407895886e-05, "loss": 1.0136, "step": 36610 }, { "epoch": 3.2, "learning_rate": 3.4007336885317495e-05, "loss": 0.9579, "step": 36620 }, { "epoch": 3.2, "learning_rate": 3.400296969167613e-05, "loss": 0.867, "step": 36630 }, { "epoch": 3.2, "learning_rate": 3.399860249803476e-05, "loss": 0.8497, "step": 36640 }, { "epoch": 3.2, "learning_rate": 3.3994235304393395e-05, "loss": 1.0245, "step": 36650 }, { "epoch": 3.2, "learning_rate": 3.3989868110752035e-05, "loss": 0.9093, "step": 36660 }, { "epoch": 3.2, "learning_rate": 3.398550091711067e-05, "loss": 0.8243, "step": 36670 }, { "epoch": 3.2, "learning_rate": 3.39811337234693e-05, "loss": 0.8868, "step": 36680 }, { "epoch": 3.2, "learning_rate": 3.3976766529827935e-05, "loss": 0.8731, "step": 36690 }, { "epoch": 3.21, "learning_rate": 3.397239933618657e-05, "loss": 0.9336, "step": 36700 }, { "epoch": 3.21, "learning_rate": 3.39680321425452e-05, "loss": 0.8461, "step": 36710 }, { "epoch": 3.21, "learning_rate": 3.3963664948903834e-05, "loss": 0.9806, "step": 36720 }, { "epoch": 3.21, "learning_rate": 3.395929775526247e-05, "loss": 0.9746, "step": 36730 }, { "epoch": 3.21, "learning_rate": 3.395493056162111e-05, "loss": 0.9219, "step": 36740 }, { "epoch": 3.21, "learning_rate": 3.395056336797974e-05, "loss": 0.8256, "step": 36750 }, { "epoch": 3.21, "learning_rate": 3.3946196174338374e-05, "loss": 0.8811, "step": 36760 }, { "epoch": 3.21, "learning_rate": 3.394182898069701e-05, "loss": 0.9958, "step": 36770 }, { "epoch": 3.21, "learning_rate": 3.393746178705564e-05, "loss": 1.0068, "step": 36780 }, { "epoch": 3.21, "learning_rate": 3.3933094593414273e-05, "loss": 0.784, "step": 36790 }, { "epoch": 3.21, "learning_rate": 3.392872739977291e-05, "loss": 0.9068, "step": 36800 }, { "epoch": 3.22, "learning_rate": 3.392436020613154e-05, "loss": 0.8763, "step": 36810 }, { "epoch": 3.22, "learning_rate": 3.391999301249017e-05, "loss": 0.9686, "step": 36820 }, { "epoch": 3.22, "learning_rate": 3.391562581884881e-05, "loss": 0.9457, "step": 36830 }, { "epoch": 3.22, "learning_rate": 3.391125862520744e-05, "loss": 0.8564, "step": 36840 }, { "epoch": 3.22, "learning_rate": 3.390689143156608e-05, "loss": 0.99, "step": 36850 }, { "epoch": 3.22, "learning_rate": 3.390252423792471e-05, "loss": 0.9033, "step": 36860 }, { "epoch": 3.22, "learning_rate": 3.3898157044283346e-05, "loss": 0.9307, "step": 36870 }, { "epoch": 3.22, "learning_rate": 3.389378985064198e-05, "loss": 1.0438, "step": 36880 }, { "epoch": 3.22, "learning_rate": 3.388942265700061e-05, "loss": 0.7963, "step": 36890 }, { "epoch": 3.22, "learning_rate": 3.3885055463359245e-05, "loss": 0.8291, "step": 36900 }, { "epoch": 3.22, "learning_rate": 3.388068826971788e-05, "loss": 0.8747, "step": 36910 }, { "epoch": 3.22, "learning_rate": 3.387632107607652e-05, "loss": 0.9417, "step": 36920 }, { "epoch": 3.23, "learning_rate": 3.3871953882435145e-05, "loss": 0.8939, "step": 36930 }, { "epoch": 3.23, "learning_rate": 3.3867586688793785e-05, "loss": 0.8849, "step": 36940 }, { "epoch": 3.23, "learning_rate": 3.386321949515242e-05, "loss": 0.8524, "step": 36950 }, { "epoch": 3.23, "learning_rate": 3.385885230151105e-05, "loss": 0.6721, "step": 36960 }, { "epoch": 3.23, "learning_rate": 3.3854485107869685e-05, "loss": 0.8793, "step": 36970 }, { "epoch": 3.23, "learning_rate": 3.385011791422832e-05, "loss": 0.8334, "step": 36980 }, { "epoch": 3.23, "learning_rate": 3.384575072058696e-05, "loss": 0.9656, "step": 36990 }, { "epoch": 3.23, "learning_rate": 3.3841383526945584e-05, "loss": 0.8265, "step": 37000 }, { "epoch": 3.23, "learning_rate": 3.3837016333304224e-05, "loss": 0.8985, "step": 37010 }, { "epoch": 3.23, "learning_rate": 3.383264913966285e-05, "loss": 0.9337, "step": 37020 }, { "epoch": 3.23, "learning_rate": 3.382828194602149e-05, "loss": 0.9311, "step": 37030 }, { "epoch": 3.24, "learning_rate": 3.382391475238012e-05, "loss": 0.8259, "step": 37040 }, { "epoch": 3.24, "learning_rate": 3.381954755873876e-05, "loss": 0.9188, "step": 37050 }, { "epoch": 3.24, "learning_rate": 3.381518036509739e-05, "loss": 0.8173, "step": 37060 }, { "epoch": 3.24, "learning_rate": 3.3810813171456023e-05, "loss": 0.978, "step": 37070 }, { "epoch": 3.24, "learning_rate": 3.3806445977814663e-05, "loss": 0.8295, "step": 37080 }, { "epoch": 3.24, "learning_rate": 3.380207878417329e-05, "loss": 0.7591, "step": 37090 }, { "epoch": 3.24, "learning_rate": 3.379771159053193e-05, "loss": 0.9092, "step": 37100 }, { "epoch": 3.24, "learning_rate": 3.3793344396890556e-05, "loss": 0.7545, "step": 37110 }, { "epoch": 3.24, "learning_rate": 3.3788977203249196e-05, "loss": 0.9288, "step": 37120 }, { "epoch": 3.24, "learning_rate": 3.378461000960782e-05, "loss": 0.9138, "step": 37130 }, { "epoch": 3.24, "learning_rate": 3.378024281596646e-05, "loss": 0.8662, "step": 37140 }, { "epoch": 3.24, "learning_rate": 3.3775875622325096e-05, "loss": 0.7862, "step": 37150 }, { "epoch": 3.25, "learning_rate": 3.377150842868373e-05, "loss": 0.8301, "step": 37160 }, { "epoch": 3.25, "learning_rate": 3.376714123504236e-05, "loss": 0.9223, "step": 37170 }, { "epoch": 3.25, "learning_rate": 3.3762774041400995e-05, "loss": 0.9533, "step": 37180 }, { "epoch": 3.25, "learning_rate": 3.3758406847759635e-05, "loss": 0.8741, "step": 37190 }, { "epoch": 3.25, "learning_rate": 3.375403965411826e-05, "loss": 0.957, "step": 37200 }, { "epoch": 3.25, "learning_rate": 3.37496724604769e-05, "loss": 0.8571, "step": 37210 }, { "epoch": 3.25, "learning_rate": 3.374530526683553e-05, "loss": 0.8671, "step": 37220 }, { "epoch": 3.25, "learning_rate": 3.374093807319417e-05, "loss": 0.8366, "step": 37230 }, { "epoch": 3.25, "learning_rate": 3.37365708795528e-05, "loss": 0.8003, "step": 37240 }, { "epoch": 3.25, "learning_rate": 3.3732203685911435e-05, "loss": 0.9867, "step": 37250 }, { "epoch": 3.25, "learning_rate": 3.372783649227007e-05, "loss": 1.0672, "step": 37260 }, { "epoch": 3.26, "learning_rate": 3.37234692986287e-05, "loss": 0.9091, "step": 37270 }, { "epoch": 3.26, "learning_rate": 3.371910210498734e-05, "loss": 0.8926, "step": 37280 }, { "epoch": 3.26, "learning_rate": 3.371473491134597e-05, "loss": 0.8997, "step": 37290 }, { "epoch": 3.26, "learning_rate": 3.371036771770461e-05, "loss": 0.8784, "step": 37300 }, { "epoch": 3.26, "learning_rate": 3.3706000524063234e-05, "loss": 0.9578, "step": 37310 }, { "epoch": 3.26, "learning_rate": 3.3701633330421874e-05, "loss": 1.0252, "step": 37320 }, { "epoch": 3.26, "learning_rate": 3.369726613678051e-05, "loss": 0.9283, "step": 37330 }, { "epoch": 3.26, "learning_rate": 3.369289894313914e-05, "loss": 0.8785, "step": 37340 }, { "epoch": 3.26, "learning_rate": 3.3688531749497773e-05, "loss": 0.9622, "step": 37350 }, { "epoch": 3.26, "learning_rate": 3.368416455585641e-05, "loss": 0.8911, "step": 37360 }, { "epoch": 3.26, "learning_rate": 3.367979736221504e-05, "loss": 0.9444, "step": 37370 }, { "epoch": 3.26, "learning_rate": 3.367543016857367e-05, "loss": 0.9206, "step": 37380 }, { "epoch": 3.27, "learning_rate": 3.367106297493231e-05, "loss": 0.8737, "step": 37390 }, { "epoch": 3.27, "learning_rate": 3.3666695781290946e-05, "loss": 0.9084, "step": 37400 }, { "epoch": 3.27, "learning_rate": 3.366232858764958e-05, "loss": 0.8716, "step": 37410 }, { "epoch": 3.27, "learning_rate": 3.365796139400821e-05, "loss": 0.8926, "step": 37420 }, { "epoch": 3.27, "learning_rate": 3.3653594200366846e-05, "loss": 0.991, "step": 37430 }, { "epoch": 3.27, "learning_rate": 3.364922700672548e-05, "loss": 0.981, "step": 37440 }, { "epoch": 3.27, "learning_rate": 3.364485981308411e-05, "loss": 0.8926, "step": 37450 }, { "epoch": 3.27, "learning_rate": 3.3640492619442745e-05, "loss": 0.8259, "step": 37460 }, { "epoch": 3.27, "learning_rate": 3.363612542580138e-05, "loss": 0.8469, "step": 37470 }, { "epoch": 3.27, "learning_rate": 3.363175823216002e-05, "loss": 0.9116, "step": 37480 }, { "epoch": 3.27, "learning_rate": 3.362739103851865e-05, "loss": 0.843, "step": 37490 }, { "epoch": 3.28, "learning_rate": 3.3623023844877285e-05, "loss": 0.9118, "step": 37500 }, { "epoch": 3.28, "learning_rate": 3.361865665123592e-05, "loss": 0.9026, "step": 37510 }, { "epoch": 3.28, "learning_rate": 3.361428945759455e-05, "loss": 0.8457, "step": 37520 }, { "epoch": 3.28, "learning_rate": 3.3609922263953185e-05, "loss": 0.8222, "step": 37530 }, { "epoch": 3.28, "learning_rate": 3.360555507031182e-05, "loss": 0.8583, "step": 37540 }, { "epoch": 3.28, "learning_rate": 3.360118787667045e-05, "loss": 0.7543, "step": 37550 }, { "epoch": 3.28, "learning_rate": 3.3596820683029084e-05, "loss": 0.8891, "step": 37560 }, { "epoch": 3.28, "learning_rate": 3.359245348938772e-05, "loss": 0.8562, "step": 37570 }, { "epoch": 3.28, "learning_rate": 3.358808629574636e-05, "loss": 0.875, "step": 37580 }, { "epoch": 3.28, "learning_rate": 3.358371910210499e-05, "loss": 1.0304, "step": 37590 }, { "epoch": 3.28, "learning_rate": 3.3579351908463624e-05, "loss": 0.824, "step": 37600 }, { "epoch": 3.29, "learning_rate": 3.357498471482226e-05, "loss": 0.9239, "step": 37610 }, { "epoch": 3.29, "learning_rate": 3.357061752118089e-05, "loss": 0.8727, "step": 37620 }, { "epoch": 3.29, "learning_rate": 3.3566250327539523e-05, "loss": 0.9683, "step": 37630 }, { "epoch": 3.29, "learning_rate": 3.356188313389816e-05, "loss": 0.8903, "step": 37640 }, { "epoch": 3.29, "learning_rate": 3.35575159402568e-05, "loss": 0.7583, "step": 37650 }, { "epoch": 3.29, "learning_rate": 3.355314874661542e-05, "loss": 0.8851, "step": 37660 }, { "epoch": 3.29, "learning_rate": 3.354878155297406e-05, "loss": 1.0015, "step": 37670 }, { "epoch": 3.29, "learning_rate": 3.3544414359332696e-05, "loss": 0.8163, "step": 37680 }, { "epoch": 3.29, "learning_rate": 3.354004716569133e-05, "loss": 0.8691, "step": 37690 }, { "epoch": 3.29, "learning_rate": 3.353567997204996e-05, "loss": 0.9769, "step": 37700 }, { "epoch": 3.29, "learning_rate": 3.3531312778408596e-05, "loss": 0.8945, "step": 37710 }, { "epoch": 3.29, "learning_rate": 3.352694558476723e-05, "loss": 0.8299, "step": 37720 }, { "epoch": 3.3, "learning_rate": 3.352257839112586e-05, "loss": 0.8719, "step": 37730 }, { "epoch": 3.3, "learning_rate": 3.35182111974845e-05, "loss": 0.8188, "step": 37740 }, { "epoch": 3.3, "learning_rate": 3.351384400384313e-05, "loss": 0.7876, "step": 37750 }, { "epoch": 3.3, "learning_rate": 3.350947681020177e-05, "loss": 0.9175, "step": 37760 }, { "epoch": 3.3, "learning_rate": 3.3505109616560395e-05, "loss": 0.7864, "step": 37770 }, { "epoch": 3.3, "learning_rate": 3.3500742422919035e-05, "loss": 0.9697, "step": 37780 }, { "epoch": 3.3, "learning_rate": 3.349637522927767e-05, "loss": 0.9064, "step": 37790 }, { "epoch": 3.3, "learning_rate": 3.34920080356363e-05, "loss": 0.9331, "step": 37800 }, { "epoch": 3.3, "learning_rate": 3.348764084199494e-05, "loss": 0.9566, "step": 37810 }, { "epoch": 3.3, "learning_rate": 3.348327364835357e-05, "loss": 0.8627, "step": 37820 }, { "epoch": 3.3, "learning_rate": 3.347890645471221e-05, "loss": 0.8814, "step": 37830 }, { "epoch": 3.31, "learning_rate": 3.3474539261070834e-05, "loss": 0.8672, "step": 37840 }, { "epoch": 3.31, "learning_rate": 3.3470172067429474e-05, "loss": 0.9348, "step": 37850 }, { "epoch": 3.31, "learning_rate": 3.34658048737881e-05, "loss": 0.914, "step": 37860 }, { "epoch": 3.31, "learning_rate": 3.346143768014674e-05, "loss": 0.8979, "step": 37870 }, { "epoch": 3.31, "learning_rate": 3.3457070486505374e-05, "loss": 0.9306, "step": 37880 }, { "epoch": 3.31, "learning_rate": 3.345270329286401e-05, "loss": 0.9296, "step": 37890 }, { "epoch": 3.31, "learning_rate": 3.344833609922264e-05, "loss": 0.8789, "step": 37900 }, { "epoch": 3.31, "learning_rate": 3.3443968905581273e-05, "loss": 0.7948, "step": 37910 }, { "epoch": 3.31, "learning_rate": 3.3439601711939913e-05, "loss": 0.7723, "step": 37920 }, { "epoch": 3.31, "learning_rate": 3.343523451829854e-05, "loss": 0.9166, "step": 37930 }, { "epoch": 3.31, "learning_rate": 3.343086732465718e-05, "loss": 0.7961, "step": 37940 }, { "epoch": 3.31, "learning_rate": 3.3426500131015806e-05, "loss": 0.8378, "step": 37950 }, { "epoch": 3.32, "learning_rate": 3.3422132937374446e-05, "loss": 0.9409, "step": 37960 }, { "epoch": 3.32, "learning_rate": 3.341776574373307e-05, "loss": 1.0128, "step": 37970 }, { "epoch": 3.32, "learning_rate": 3.341339855009171e-05, "loss": 0.9386, "step": 37980 }, { "epoch": 3.32, "learning_rate": 3.3409031356450346e-05, "loss": 0.8847, "step": 37990 }, { "epoch": 3.32, "learning_rate": 3.340466416280898e-05, "loss": 0.8472, "step": 38000 }, { "epoch": 3.32, "learning_rate": 3.340029696916762e-05, "loss": 0.8086, "step": 38010 }, { "epoch": 3.32, "learning_rate": 3.3395929775526245e-05, "loss": 0.8825, "step": 38020 }, { "epoch": 3.32, "learning_rate": 3.3391562581884885e-05, "loss": 0.8778, "step": 38030 }, { "epoch": 3.32, "learning_rate": 3.338719538824351e-05, "loss": 1.017, "step": 38040 }, { "epoch": 3.32, "learning_rate": 3.338282819460215e-05, "loss": 0.768, "step": 38050 }, { "epoch": 3.32, "learning_rate": 3.3378461000960785e-05, "loss": 1.0126, "step": 38060 }, { "epoch": 3.33, "learning_rate": 3.337409380731942e-05, "loss": 0.7972, "step": 38070 }, { "epoch": 3.33, "learning_rate": 3.336972661367805e-05, "loss": 0.8973, "step": 38080 }, { "epoch": 3.33, "learning_rate": 3.3365359420036685e-05, "loss": 0.9751, "step": 38090 }, { "epoch": 3.33, "learning_rate": 3.336099222639532e-05, "loss": 0.8613, "step": 38100 }, { "epoch": 3.33, "learning_rate": 3.335662503275395e-05, "loss": 0.8612, "step": 38110 }, { "epoch": 3.33, "learning_rate": 3.335225783911259e-05, "loss": 0.8904, "step": 38120 }, { "epoch": 3.33, "learning_rate": 3.334789064547122e-05, "loss": 0.8445, "step": 38130 }, { "epoch": 3.33, "learning_rate": 3.334352345182986e-05, "loss": 0.9083, "step": 38140 }, { "epoch": 3.33, "learning_rate": 3.333915625818849e-05, "loss": 0.9217, "step": 38150 }, { "epoch": 3.33, "learning_rate": 3.3334789064547124e-05, "loss": 0.9023, "step": 38160 }, { "epoch": 3.33, "learning_rate": 3.333042187090576e-05, "loss": 0.9067, "step": 38170 }, { "epoch": 3.33, "learning_rate": 3.332605467726439e-05, "loss": 0.8068, "step": 38180 }, { "epoch": 3.34, "learning_rate": 3.3321687483623023e-05, "loss": 0.805, "step": 38190 }, { "epoch": 3.34, "learning_rate": 3.331732028998166e-05, "loss": 0.8529, "step": 38200 }, { "epoch": 3.34, "learning_rate": 3.33129530963403e-05, "loss": 0.957, "step": 38210 }, { "epoch": 3.34, "learning_rate": 3.330858590269893e-05, "loss": 0.7811, "step": 38220 }, { "epoch": 3.34, "learning_rate": 3.330421870905756e-05, "loss": 0.9307, "step": 38230 }, { "epoch": 3.34, "learning_rate": 3.3299851515416196e-05, "loss": 0.8822, "step": 38240 }, { "epoch": 3.34, "learning_rate": 3.329548432177483e-05, "loss": 0.8742, "step": 38250 }, { "epoch": 3.34, "learning_rate": 3.329111712813346e-05, "loss": 0.9279, "step": 38260 }, { "epoch": 3.34, "learning_rate": 3.3286749934492096e-05, "loss": 0.8408, "step": 38270 }, { "epoch": 3.34, "learning_rate": 3.328238274085073e-05, "loss": 0.9177, "step": 38280 }, { "epoch": 3.34, "learning_rate": 3.327801554720936e-05, "loss": 0.8692, "step": 38290 }, { "epoch": 3.35, "learning_rate": 3.3273648353567995e-05, "loss": 0.8729, "step": 38300 }, { "epoch": 3.35, "learning_rate": 3.3269281159926635e-05, "loss": 1.0512, "step": 38310 }, { "epoch": 3.35, "learning_rate": 3.326491396628527e-05, "loss": 0.7673, "step": 38320 }, { "epoch": 3.35, "learning_rate": 3.32605467726439e-05, "loss": 0.8329, "step": 38330 }, { "epoch": 3.35, "learning_rate": 3.3256179579002535e-05, "loss": 0.9095, "step": 38340 }, { "epoch": 3.35, "learning_rate": 3.325181238536117e-05, "loss": 0.8141, "step": 38350 }, { "epoch": 3.35, "learning_rate": 3.32474451917198e-05, "loss": 0.8884, "step": 38360 }, { "epoch": 3.35, "learning_rate": 3.3243077998078435e-05, "loss": 0.9357, "step": 38370 }, { "epoch": 3.35, "learning_rate": 3.323871080443707e-05, "loss": 0.8722, "step": 38380 }, { "epoch": 3.35, "learning_rate": 3.32343436107957e-05, "loss": 1.0003, "step": 38390 }, { "epoch": 3.35, "learning_rate": 3.322997641715434e-05, "loss": 0.7806, "step": 38400 }, { "epoch": 3.35, "learning_rate": 3.3225609223512974e-05, "loss": 0.7441, "step": 38410 }, { "epoch": 3.36, "learning_rate": 3.322124202987161e-05, "loss": 0.8462, "step": 38420 }, { "epoch": 3.36, "learning_rate": 3.321687483623024e-05, "loss": 0.9057, "step": 38430 }, { "epoch": 3.36, "learning_rate": 3.3212507642588874e-05, "loss": 0.7867, "step": 38440 }, { "epoch": 3.36, "learning_rate": 3.320814044894751e-05, "loss": 0.9724, "step": 38450 }, { "epoch": 3.36, "learning_rate": 3.320377325530614e-05, "loss": 0.9636, "step": 38460 }, { "epoch": 3.36, "learning_rate": 3.319940606166478e-05, "loss": 0.9747, "step": 38470 }, { "epoch": 3.36, "learning_rate": 3.319503886802341e-05, "loss": 0.9546, "step": 38480 }, { "epoch": 3.36, "learning_rate": 3.319067167438205e-05, "loss": 0.7638, "step": 38490 }, { "epoch": 3.36, "learning_rate": 3.318630448074067e-05, "loss": 0.8517, "step": 38500 }, { "epoch": 3.36, "learning_rate": 3.318193728709931e-05, "loss": 0.9352, "step": 38510 }, { "epoch": 3.36, "learning_rate": 3.3177570093457946e-05, "loss": 0.7845, "step": 38520 }, { "epoch": 3.37, "learning_rate": 3.317320289981658e-05, "loss": 0.8812, "step": 38530 }, { "epoch": 3.37, "learning_rate": 3.316883570617521e-05, "loss": 0.8305, "step": 38540 }, { "epoch": 3.37, "learning_rate": 3.3164468512533846e-05, "loss": 0.8574, "step": 38550 }, { "epoch": 3.37, "learning_rate": 3.3160101318892486e-05, "loss": 0.9807, "step": 38560 }, { "epoch": 3.37, "learning_rate": 3.315573412525111e-05, "loss": 0.9211, "step": 38570 }, { "epoch": 3.37, "learning_rate": 3.315136693160975e-05, "loss": 0.8707, "step": 38580 }, { "epoch": 3.37, "learning_rate": 3.314699973796838e-05, "loss": 0.9616, "step": 38590 }, { "epoch": 3.37, "learning_rate": 3.314263254432702e-05, "loss": 0.9155, "step": 38600 }, { "epoch": 3.37, "learning_rate": 3.313826535068565e-05, "loss": 0.8248, "step": 38610 }, { "epoch": 3.37, "learning_rate": 3.3133898157044285e-05, "loss": 0.9311, "step": 38620 }, { "epoch": 3.37, "learning_rate": 3.312953096340292e-05, "loss": 0.9414, "step": 38630 }, { "epoch": 3.37, "learning_rate": 3.312516376976155e-05, "loss": 0.8742, "step": 38640 }, { "epoch": 3.38, "learning_rate": 3.312079657612019e-05, "loss": 0.8834, "step": 38650 }, { "epoch": 3.38, "learning_rate": 3.311642938247882e-05, "loss": 0.9586, "step": 38660 }, { "epoch": 3.38, "learning_rate": 3.311206218883746e-05, "loss": 0.9079, "step": 38670 }, { "epoch": 3.38, "learning_rate": 3.3107694995196084e-05, "loss": 0.8603, "step": 38680 }, { "epoch": 3.38, "learning_rate": 3.3103327801554724e-05, "loss": 0.9403, "step": 38690 }, { "epoch": 3.38, "learning_rate": 3.309896060791335e-05, "loss": 0.8825, "step": 38700 }, { "epoch": 3.38, "learning_rate": 3.309459341427199e-05, "loss": 0.8225, "step": 38710 }, { "epoch": 3.38, "learning_rate": 3.3090226220630624e-05, "loss": 0.9175, "step": 38720 }, { "epoch": 3.38, "learning_rate": 3.308585902698926e-05, "loss": 0.8398, "step": 38730 }, { "epoch": 3.38, "learning_rate": 3.30814918333479e-05, "loss": 0.9789, "step": 38740 }, { "epoch": 3.38, "learning_rate": 3.3077124639706523e-05, "loss": 0.9611, "step": 38750 }, { "epoch": 3.39, "learning_rate": 3.3072757446065163e-05, "loss": 0.8102, "step": 38760 }, { "epoch": 3.39, "learning_rate": 3.306839025242379e-05, "loss": 0.7973, "step": 38770 }, { "epoch": 3.39, "learning_rate": 3.306402305878243e-05, "loss": 0.9467, "step": 38780 }, { "epoch": 3.39, "learning_rate": 3.3059655865141056e-05, "loss": 0.8624, "step": 38790 }, { "epoch": 3.39, "learning_rate": 3.3055288671499696e-05, "loss": 0.8698, "step": 38800 }, { "epoch": 3.39, "learning_rate": 3.305092147785833e-05, "loss": 0.889, "step": 38810 }, { "epoch": 3.39, "learning_rate": 3.304655428421696e-05, "loss": 0.8917, "step": 38820 }, { "epoch": 3.39, "learning_rate": 3.3042187090575596e-05, "loss": 0.9046, "step": 38830 }, { "epoch": 3.39, "learning_rate": 3.303781989693423e-05, "loss": 0.8106, "step": 38840 }, { "epoch": 3.39, "learning_rate": 3.303345270329287e-05, "loss": 1.0245, "step": 38850 }, { "epoch": 3.39, "learning_rate": 3.3029085509651496e-05, "loss": 0.8319, "step": 38860 }, { "epoch": 3.4, "learning_rate": 3.3024718316010135e-05, "loss": 0.994, "step": 38870 }, { "epoch": 3.4, "learning_rate": 3.302035112236877e-05, "loss": 0.8914, "step": 38880 }, { "epoch": 3.4, "learning_rate": 3.30159839287274e-05, "loss": 0.9563, "step": 38890 }, { "epoch": 3.4, "learning_rate": 3.3011616735086035e-05, "loss": 0.8352, "step": 38900 }, { "epoch": 3.4, "learning_rate": 3.300724954144467e-05, "loss": 0.8798, "step": 38910 }, { "epoch": 3.4, "learning_rate": 3.30028823478033e-05, "loss": 0.8841, "step": 38920 }, { "epoch": 3.4, "learning_rate": 3.2998515154161935e-05, "loss": 0.9059, "step": 38930 }, { "epoch": 3.4, "learning_rate": 3.2994147960520575e-05, "loss": 0.8982, "step": 38940 }, { "epoch": 3.4, "learning_rate": 3.29897807668792e-05, "loss": 0.8473, "step": 38950 }, { "epoch": 3.4, "learning_rate": 3.298541357323784e-05, "loss": 0.8248, "step": 38960 }, { "epoch": 3.4, "learning_rate": 3.2981046379596474e-05, "loss": 0.925, "step": 38970 }, { "epoch": 3.4, "learning_rate": 3.297667918595511e-05, "loss": 0.7976, "step": 38980 }, { "epoch": 3.41, "learning_rate": 3.297231199231374e-05, "loss": 0.8149, "step": 38990 }, { "epoch": 3.41, "learning_rate": 3.2967944798672374e-05, "loss": 0.9039, "step": 39000 }, { "epoch": 3.41, "learning_rate": 3.296357760503101e-05, "loss": 0.8951, "step": 39010 }, { "epoch": 3.41, "learning_rate": 3.295921041138964e-05, "loss": 0.8596, "step": 39020 }, { "epoch": 3.41, "learning_rate": 3.2954843217748274e-05, "loss": 0.9184, "step": 39030 }, { "epoch": 3.41, "learning_rate": 3.295047602410691e-05, "loss": 0.7691, "step": 39040 }, { "epoch": 3.41, "learning_rate": 3.294610883046555e-05, "loss": 0.9152, "step": 39050 }, { "epoch": 3.41, "learning_rate": 3.294174163682418e-05, "loss": 0.9746, "step": 39060 }, { "epoch": 3.41, "learning_rate": 3.293737444318281e-05, "loss": 0.8897, "step": 39070 }, { "epoch": 3.41, "learning_rate": 3.2933007249541446e-05, "loss": 0.8328, "step": 39080 }, { "epoch": 3.41, "learning_rate": 3.292864005590008e-05, "loss": 0.9095, "step": 39090 }, { "epoch": 3.42, "learning_rate": 3.292427286225871e-05, "loss": 0.9215, "step": 39100 }, { "epoch": 3.42, "learning_rate": 3.2919905668617346e-05, "loss": 0.8609, "step": 39110 }, { "epoch": 3.42, "learning_rate": 3.291553847497598e-05, "loss": 0.912, "step": 39120 }, { "epoch": 3.42, "learning_rate": 3.291117128133462e-05, "loss": 0.937, "step": 39130 }, { "epoch": 3.42, "learning_rate": 3.290680408769325e-05, "loss": 0.9144, "step": 39140 }, { "epoch": 3.42, "learning_rate": 3.2902436894051885e-05, "loss": 0.9416, "step": 39150 }, { "epoch": 3.42, "learning_rate": 3.289806970041052e-05, "loss": 0.9167, "step": 39160 }, { "epoch": 3.42, "learning_rate": 3.289370250676915e-05, "loss": 0.9428, "step": 39170 }, { "epoch": 3.42, "learning_rate": 3.2889335313127785e-05, "loss": 0.8021, "step": 39180 }, { "epoch": 3.42, "learning_rate": 3.288496811948642e-05, "loss": 0.8767, "step": 39190 }, { "epoch": 3.42, "learning_rate": 3.288060092584505e-05, "loss": 0.9318, "step": 39200 }, { "epoch": 3.42, "learning_rate": 3.2876233732203685e-05, "loss": 0.9166, "step": 39210 }, { "epoch": 3.43, "learning_rate": 3.2871866538562325e-05, "loss": 0.7278, "step": 39220 }, { "epoch": 3.43, "learning_rate": 3.286749934492095e-05, "loss": 0.9183, "step": 39230 }, { "epoch": 3.43, "learning_rate": 3.286313215127959e-05, "loss": 0.8492, "step": 39240 }, { "epoch": 3.43, "learning_rate": 3.2858764957638224e-05, "loss": 0.8909, "step": 39250 }, { "epoch": 3.43, "learning_rate": 3.285439776399686e-05, "loss": 0.684, "step": 39260 }, { "epoch": 3.43, "learning_rate": 3.285003057035549e-05, "loss": 0.9357, "step": 39270 }, { "epoch": 3.43, "learning_rate": 3.2845663376714124e-05, "loss": 0.8707, "step": 39280 }, { "epoch": 3.43, "learning_rate": 3.2841296183072764e-05, "loss": 0.7633, "step": 39290 }, { "epoch": 3.43, "learning_rate": 3.283692898943139e-05, "loss": 0.8987, "step": 39300 }, { "epoch": 3.43, "learning_rate": 3.283256179579003e-05, "loss": 0.9131, "step": 39310 }, { "epoch": 3.43, "learning_rate": 3.282819460214866e-05, "loss": 0.8101, "step": 39320 }, { "epoch": 3.44, "learning_rate": 3.28238274085073e-05, "loss": 0.7963, "step": 39330 }, { "epoch": 3.44, "learning_rate": 3.281946021486593e-05, "loss": 0.766, "step": 39340 }, { "epoch": 3.44, "learning_rate": 3.281509302122456e-05, "loss": 0.7346, "step": 39350 }, { "epoch": 3.44, "learning_rate": 3.2810725827583196e-05, "loss": 0.9294, "step": 39360 }, { "epoch": 3.44, "learning_rate": 3.280635863394183e-05, "loss": 0.8245, "step": 39370 }, { "epoch": 3.44, "learning_rate": 3.280199144030047e-05, "loss": 1.1549, "step": 39380 }, { "epoch": 3.44, "learning_rate": 3.2797624246659096e-05, "loss": 0.8841, "step": 39390 }, { "epoch": 3.44, "learning_rate": 3.2793257053017736e-05, "loss": 0.8597, "step": 39400 }, { "epoch": 3.44, "learning_rate": 3.278888985937636e-05, "loss": 0.917, "step": 39410 }, { "epoch": 3.44, "learning_rate": 3.2784522665735e-05, "loss": 0.8988, "step": 39420 }, { "epoch": 3.44, "learning_rate": 3.2780155472093636e-05, "loss": 0.8952, "step": 39430 }, { "epoch": 3.44, "learning_rate": 3.277578827845227e-05, "loss": 0.8713, "step": 39440 }, { "epoch": 3.45, "learning_rate": 3.27714210848109e-05, "loss": 0.7879, "step": 39450 }, { "epoch": 3.45, "learning_rate": 3.2767053891169535e-05, "loss": 0.8459, "step": 39460 }, { "epoch": 3.45, "learning_rate": 3.2762686697528175e-05, "loss": 0.9478, "step": 39470 }, { "epoch": 3.45, "learning_rate": 3.27583195038868e-05, "loss": 0.8545, "step": 39480 }, { "epoch": 3.45, "learning_rate": 3.275395231024544e-05, "loss": 0.7723, "step": 39490 }, { "epoch": 3.45, "learning_rate": 3.274958511660407e-05, "loss": 0.9551, "step": 39500 }, { "epoch": 3.45, "learning_rate": 3.274521792296271e-05, "loss": 0.867, "step": 39510 }, { "epoch": 3.45, "learning_rate": 3.2740850729321334e-05, "loss": 0.8125, "step": 39520 }, { "epoch": 3.45, "learning_rate": 3.2736483535679974e-05, "loss": 0.8484, "step": 39530 }, { "epoch": 3.45, "learning_rate": 3.273211634203861e-05, "loss": 0.8548, "step": 39540 }, { "epoch": 3.45, "learning_rate": 3.272774914839724e-05, "loss": 1.0267, "step": 39550 }, { "epoch": 3.46, "learning_rate": 3.272338195475588e-05, "loss": 0.8919, "step": 39560 }, { "epoch": 3.46, "learning_rate": 3.271901476111451e-05, "loss": 0.9334, "step": 39570 }, { "epoch": 3.46, "learning_rate": 3.271464756747315e-05, "loss": 0.8413, "step": 39580 }, { "epoch": 3.46, "learning_rate": 3.2710280373831774e-05, "loss": 0.8609, "step": 39590 }, { "epoch": 3.46, "learning_rate": 3.2705913180190414e-05, "loss": 0.9368, "step": 39600 }, { "epoch": 3.46, "learning_rate": 3.270154598654904e-05, "loss": 0.936, "step": 39610 }, { "epoch": 3.46, "learning_rate": 3.269717879290768e-05, "loss": 0.9436, "step": 39620 }, { "epoch": 3.46, "learning_rate": 3.269281159926631e-05, "loss": 0.8715, "step": 39630 }, { "epoch": 3.46, "learning_rate": 3.2688444405624946e-05, "loss": 0.8315, "step": 39640 }, { "epoch": 3.46, "learning_rate": 3.268407721198358e-05, "loss": 0.796, "step": 39650 }, { "epoch": 3.46, "learning_rate": 3.267971001834221e-05, "loss": 0.8514, "step": 39660 }, { "epoch": 3.46, "learning_rate": 3.267534282470085e-05, "loss": 0.9298, "step": 39670 }, { "epoch": 3.47, "learning_rate": 3.267097563105948e-05, "loss": 0.9868, "step": 39680 }, { "epoch": 3.47, "learning_rate": 3.266660843741812e-05, "loss": 0.8787, "step": 39690 }, { "epoch": 3.47, "learning_rate": 3.266224124377675e-05, "loss": 0.8319, "step": 39700 }, { "epoch": 3.47, "learning_rate": 3.2657874050135386e-05, "loss": 0.8805, "step": 39710 }, { "epoch": 3.47, "learning_rate": 3.265350685649402e-05, "loss": 0.9283, "step": 39720 }, { "epoch": 3.47, "learning_rate": 3.264913966285265e-05, "loss": 0.8665, "step": 39730 }, { "epoch": 3.47, "learning_rate": 3.2644772469211285e-05, "loss": 0.886, "step": 39740 }, { "epoch": 3.47, "learning_rate": 3.264040527556992e-05, "loss": 0.9228, "step": 39750 }, { "epoch": 3.47, "learning_rate": 3.263603808192856e-05, "loss": 1.0164, "step": 39760 }, { "epoch": 3.47, "learning_rate": 3.2631670888287185e-05, "loss": 0.8293, "step": 39770 }, { "epoch": 3.47, "learning_rate": 3.2627303694645825e-05, "loss": 0.8743, "step": 39780 }, { "epoch": 3.48, "learning_rate": 3.262293650100446e-05, "loss": 0.917, "step": 39790 }, { "epoch": 3.48, "learning_rate": 3.261856930736309e-05, "loss": 0.9465, "step": 39800 }, { "epoch": 3.48, "learning_rate": 3.2614202113721724e-05, "loss": 0.8184, "step": 39810 }, { "epoch": 3.48, "learning_rate": 3.260983492008036e-05, "loss": 0.8406, "step": 39820 }, { "epoch": 3.48, "learning_rate": 3.260546772643899e-05, "loss": 0.8139, "step": 39830 }, { "epoch": 3.48, "learning_rate": 3.2601100532797624e-05, "loss": 0.7886, "step": 39840 }, { "epoch": 3.48, "learning_rate": 3.259673333915626e-05, "loss": 0.7649, "step": 39850 }, { "epoch": 3.48, "learning_rate": 3.259236614551489e-05, "loss": 0.8103, "step": 39860 }, { "epoch": 3.48, "learning_rate": 3.258799895187353e-05, "loss": 0.8564, "step": 39870 }, { "epoch": 3.48, "learning_rate": 3.2583631758232164e-05, "loss": 0.8123, "step": 39880 }, { "epoch": 3.48, "learning_rate": 3.25792645645908e-05, "loss": 1.0089, "step": 39890 }, { "epoch": 3.49, "learning_rate": 3.257489737094943e-05, "loss": 0.8636, "step": 39900 }, { "epoch": 3.49, "learning_rate": 3.257053017730806e-05, "loss": 0.8431, "step": 39910 }, { "epoch": 3.49, "learning_rate": 3.2566162983666696e-05, "loss": 0.8657, "step": 39920 }, { "epoch": 3.49, "learning_rate": 3.256179579002533e-05, "loss": 0.8181, "step": 39930 }, { "epoch": 3.49, "learning_rate": 3.255742859638396e-05, "loss": 0.8357, "step": 39940 }, { "epoch": 3.49, "learning_rate": 3.25530614027426e-05, "loss": 0.9168, "step": 39950 }, { "epoch": 3.49, "learning_rate": 3.2548694209101236e-05, "loss": 0.9096, "step": 39960 }, { "epoch": 3.49, "learning_rate": 3.254432701545987e-05, "loss": 0.9179, "step": 39970 }, { "epoch": 3.49, "learning_rate": 3.25399598218185e-05, "loss": 0.9418, "step": 39980 }, { "epoch": 3.49, "learning_rate": 3.2535592628177136e-05, "loss": 0.8835, "step": 39990 }, { "epoch": 3.49, "learning_rate": 3.253122543453577e-05, "loss": 0.8974, "step": 40000 }, { "epoch": 3.49, "eval_accuracy": 0.5760047165176378, "eval_loss": 0.8895514011383057, "eval_runtime": 84.0745, "eval_samples_per_second": 121.047, "eval_steps_per_second": 15.141, "step": 40000 }, { "epoch": 3.49, "learning_rate": 3.25268582408944e-05, "loss": 0.9235, "step": 40010 }, { "epoch": 3.5, "learning_rate": 3.2522491047253035e-05, "loss": 0.8086, "step": 40020 }, { "epoch": 3.5, "learning_rate": 3.251812385361167e-05, "loss": 0.8962, "step": 40030 }, { "epoch": 3.5, "learning_rate": 3.251375665997031e-05, "loss": 0.7805, "step": 40040 }, { "epoch": 3.5, "learning_rate": 3.2509389466328935e-05, "loss": 0.8585, "step": 40050 }, { "epoch": 3.5, "learning_rate": 3.2505022272687575e-05, "loss": 0.9422, "step": 40060 }, { "epoch": 3.5, "learning_rate": 3.250065507904621e-05, "loss": 0.8968, "step": 40070 }, { "epoch": 3.5, "learning_rate": 3.249628788540484e-05, "loss": 0.8883, "step": 40080 }, { "epoch": 3.5, "learning_rate": 3.2491920691763474e-05, "loss": 0.8849, "step": 40090 }, { "epoch": 3.5, "learning_rate": 3.248755349812211e-05, "loss": 0.8743, "step": 40100 }, { "epoch": 3.5, "learning_rate": 3.248318630448075e-05, "loss": 0.9298, "step": 40110 }, { "epoch": 3.5, "learning_rate": 3.2478819110839374e-05, "loss": 0.8593, "step": 40120 }, { "epoch": 3.51, "learning_rate": 3.2474451917198014e-05, "loss": 0.9565, "step": 40130 }, { "epoch": 3.51, "learning_rate": 3.247008472355664e-05, "loss": 0.938, "step": 40140 }, { "epoch": 3.51, "learning_rate": 3.246571752991528e-05, "loss": 0.9469, "step": 40150 }, { "epoch": 3.51, "learning_rate": 3.2461350336273914e-05, "loss": 0.8404, "step": 40160 }, { "epoch": 3.51, "learning_rate": 3.245698314263255e-05, "loss": 0.8926, "step": 40170 }, { "epoch": 3.51, "learning_rate": 3.245261594899118e-05, "loss": 0.8676, "step": 40180 }, { "epoch": 3.51, "learning_rate": 3.244824875534981e-05, "loss": 0.9988, "step": 40190 }, { "epoch": 3.51, "learning_rate": 3.244388156170845e-05, "loss": 0.8865, "step": 40200 }, { "epoch": 3.51, "learning_rate": 3.243951436806708e-05, "loss": 0.8964, "step": 40210 }, { "epoch": 3.51, "learning_rate": 3.243514717442572e-05, "loss": 0.8868, "step": 40220 }, { "epoch": 3.51, "learning_rate": 3.2430779980784346e-05, "loss": 0.9154, "step": 40230 }, { "epoch": 3.51, "learning_rate": 3.2426412787142986e-05, "loss": 0.792, "step": 40240 }, { "epoch": 3.52, "learning_rate": 3.242204559350161e-05, "loss": 0.8183, "step": 40250 }, { "epoch": 3.52, "learning_rate": 3.241767839986025e-05, "loss": 1.0211, "step": 40260 }, { "epoch": 3.52, "learning_rate": 3.2413311206218886e-05, "loss": 0.7967, "step": 40270 }, { "epoch": 3.52, "learning_rate": 3.240894401257752e-05, "loss": 0.8736, "step": 40280 }, { "epoch": 3.52, "learning_rate": 3.240457681893616e-05, "loss": 0.8718, "step": 40290 }, { "epoch": 3.52, "learning_rate": 3.2400209625294785e-05, "loss": 0.9143, "step": 40300 }, { "epoch": 3.52, "learning_rate": 3.2395842431653425e-05, "loss": 0.8955, "step": 40310 }, { "epoch": 3.52, "learning_rate": 3.239147523801205e-05, "loss": 0.9374, "step": 40320 }, { "epoch": 3.52, "learning_rate": 3.238710804437069e-05, "loss": 1.0103, "step": 40330 }, { "epoch": 3.52, "learning_rate": 3.238274085072932e-05, "loss": 0.8145, "step": 40340 }, { "epoch": 3.52, "learning_rate": 3.237837365708796e-05, "loss": 0.886, "step": 40350 }, { "epoch": 3.53, "learning_rate": 3.237400646344659e-05, "loss": 0.9587, "step": 40360 }, { "epoch": 3.53, "learning_rate": 3.2369639269805224e-05, "loss": 0.8753, "step": 40370 }, { "epoch": 3.53, "learning_rate": 3.236527207616386e-05, "loss": 0.8139, "step": 40380 }, { "epoch": 3.53, "learning_rate": 3.236090488252249e-05, "loss": 0.9461, "step": 40390 }, { "epoch": 3.53, "learning_rate": 3.235653768888113e-05, "loss": 0.8716, "step": 40400 }, { "epoch": 3.53, "learning_rate": 3.235217049523976e-05, "loss": 0.8677, "step": 40410 }, { "epoch": 3.53, "learning_rate": 3.23478033015984e-05, "loss": 0.8765, "step": 40420 }, { "epoch": 3.53, "learning_rate": 3.2343436107957024e-05, "loss": 0.9177, "step": 40430 }, { "epoch": 3.53, "learning_rate": 3.2339068914315664e-05, "loss": 0.9075, "step": 40440 }, { "epoch": 3.53, "learning_rate": 3.23347017206743e-05, "loss": 0.9258, "step": 40450 }, { "epoch": 3.53, "learning_rate": 3.233033452703293e-05, "loss": 0.8783, "step": 40460 }, { "epoch": 3.53, "learning_rate": 3.232596733339156e-05, "loss": 0.8053, "step": 40470 }, { "epoch": 3.54, "learning_rate": 3.2321600139750196e-05, "loss": 0.9556, "step": 40480 }, { "epoch": 3.54, "learning_rate": 3.2317232946108836e-05, "loss": 0.95, "step": 40490 }, { "epoch": 3.54, "learning_rate": 3.231286575246746e-05, "loss": 0.9327, "step": 40500 }, { "epoch": 3.54, "learning_rate": 3.23084985588261e-05, "loss": 0.862, "step": 40510 }, { "epoch": 3.54, "learning_rate": 3.2304131365184736e-05, "loss": 0.8557, "step": 40520 }, { "epoch": 3.54, "learning_rate": 3.229976417154337e-05, "loss": 0.8145, "step": 40530 }, { "epoch": 3.54, "learning_rate": 3.2295396977902e-05, "loss": 0.9271, "step": 40540 }, { "epoch": 3.54, "learning_rate": 3.2291029784260636e-05, "loss": 0.9678, "step": 40550 }, { "epoch": 3.54, "learning_rate": 3.228666259061927e-05, "loss": 0.9232, "step": 40560 }, { "epoch": 3.54, "learning_rate": 3.22822953969779e-05, "loss": 0.8396, "step": 40570 }, { "epoch": 3.54, "learning_rate": 3.2277928203336535e-05, "loss": 0.9568, "step": 40580 }, { "epoch": 3.55, "learning_rate": 3.227356100969517e-05, "loss": 0.8446, "step": 40590 }, { "epoch": 3.55, "learning_rate": 3.226919381605381e-05, "loss": 0.9504, "step": 40600 }, { "epoch": 3.55, "learning_rate": 3.226482662241244e-05, "loss": 0.8994, "step": 40610 }, { "epoch": 3.55, "learning_rate": 3.2260459428771075e-05, "loss": 0.8837, "step": 40620 }, { "epoch": 3.55, "learning_rate": 3.225609223512971e-05, "loss": 0.9213, "step": 40630 }, { "epoch": 3.55, "learning_rate": 3.225172504148834e-05, "loss": 0.797, "step": 40640 }, { "epoch": 3.55, "learning_rate": 3.2247357847846974e-05, "loss": 0.9796, "step": 40650 }, { "epoch": 3.55, "learning_rate": 3.224299065420561e-05, "loss": 0.9047, "step": 40660 }, { "epoch": 3.55, "learning_rate": 3.223862346056424e-05, "loss": 0.9607, "step": 40670 }, { "epoch": 3.55, "learning_rate": 3.2234256266922874e-05, "loss": 0.7115, "step": 40680 }, { "epoch": 3.55, "learning_rate": 3.2229889073281514e-05, "loss": 0.8762, "step": 40690 }, { "epoch": 3.55, "learning_rate": 3.222552187964015e-05, "loss": 0.9666, "step": 40700 }, { "epoch": 3.56, "learning_rate": 3.222115468599878e-05, "loss": 0.7996, "step": 40710 }, { "epoch": 3.56, "learning_rate": 3.2216787492357414e-05, "loss": 0.8585, "step": 40720 }, { "epoch": 3.56, "learning_rate": 3.221242029871605e-05, "loss": 0.7867, "step": 40730 }, { "epoch": 3.56, "learning_rate": 3.220805310507468e-05, "loss": 0.833, "step": 40740 }, { "epoch": 3.56, "learning_rate": 3.220368591143331e-05, "loss": 0.8235, "step": 40750 }, { "epoch": 3.56, "learning_rate": 3.2199318717791946e-05, "loss": 0.979, "step": 40760 }, { "epoch": 3.56, "learning_rate": 3.2194951524150586e-05, "loss": 0.8592, "step": 40770 }, { "epoch": 3.56, "learning_rate": 3.219058433050921e-05, "loss": 0.8389, "step": 40780 }, { "epoch": 3.56, "learning_rate": 3.218621713686785e-05, "loss": 0.9017, "step": 40790 }, { "epoch": 3.56, "learning_rate": 3.2181849943226486e-05, "loss": 0.819, "step": 40800 }, { "epoch": 3.56, "learning_rate": 3.217748274958512e-05, "loss": 0.8763, "step": 40810 }, { "epoch": 3.57, "learning_rate": 3.217311555594375e-05, "loss": 0.8823, "step": 40820 }, { "epoch": 3.57, "learning_rate": 3.2168748362302386e-05, "loss": 0.8739, "step": 40830 }, { "epoch": 3.57, "learning_rate": 3.216438116866102e-05, "loss": 0.8904, "step": 40840 }, { "epoch": 3.57, "learning_rate": 3.216001397501965e-05, "loss": 0.8149, "step": 40850 }, { "epoch": 3.57, "learning_rate": 3.215564678137829e-05, "loss": 0.9075, "step": 40860 }, { "epoch": 3.57, "learning_rate": 3.215127958773692e-05, "loss": 0.7853, "step": 40870 }, { "epoch": 3.57, "learning_rate": 3.214691239409556e-05, "loss": 0.8349, "step": 40880 }, { "epoch": 3.57, "learning_rate": 3.214254520045419e-05, "loss": 0.7541, "step": 40890 }, { "epoch": 3.57, "learning_rate": 3.2138178006812825e-05, "loss": 0.8596, "step": 40900 }, { "epoch": 3.57, "learning_rate": 3.213381081317146e-05, "loss": 0.8424, "step": 40910 }, { "epoch": 3.57, "learning_rate": 3.212944361953009e-05, "loss": 0.8629, "step": 40920 }, { "epoch": 3.57, "learning_rate": 3.2125076425888724e-05, "loss": 0.9509, "step": 40930 }, { "epoch": 3.58, "learning_rate": 3.212070923224736e-05, "loss": 0.7917, "step": 40940 }, { "epoch": 3.58, "learning_rate": 3.2116342038606e-05, "loss": 0.8643, "step": 40950 }, { "epoch": 3.58, "learning_rate": 3.2111974844964624e-05, "loss": 0.7653, "step": 40960 }, { "epoch": 3.58, "learning_rate": 3.2107607651323264e-05, "loss": 0.9264, "step": 40970 }, { "epoch": 3.58, "learning_rate": 3.210324045768189e-05, "loss": 0.9201, "step": 40980 }, { "epoch": 3.58, "learning_rate": 3.209887326404053e-05, "loss": 0.9213, "step": 40990 }, { "epoch": 3.58, "learning_rate": 3.2094506070399164e-05, "loss": 0.9287, "step": 41000 }, { "epoch": 3.58, "learning_rate": 3.20901388767578e-05, "loss": 0.7385, "step": 41010 }, { "epoch": 3.58, "learning_rate": 3.208577168311644e-05, "loss": 0.9186, "step": 41020 }, { "epoch": 3.58, "learning_rate": 3.208140448947506e-05, "loss": 0.7156, "step": 41030 }, { "epoch": 3.58, "learning_rate": 3.20770372958337e-05, "loss": 0.8827, "step": 41040 }, { "epoch": 3.59, "learning_rate": 3.207267010219233e-05, "loss": 0.9183, "step": 41050 }, { "epoch": 3.59, "learning_rate": 3.206830290855097e-05, "loss": 1.0178, "step": 41060 }, { "epoch": 3.59, "learning_rate": 3.2063935714909596e-05, "loss": 0.8417, "step": 41070 }, { "epoch": 3.59, "learning_rate": 3.2059568521268236e-05, "loss": 0.8933, "step": 41080 }, { "epoch": 3.59, "learning_rate": 3.205520132762687e-05, "loss": 0.9257, "step": 41090 }, { "epoch": 3.59, "learning_rate": 3.20508341339855e-05, "loss": 0.924, "step": 41100 }, { "epoch": 3.59, "learning_rate": 3.2046466940344136e-05, "loss": 0.8461, "step": 41110 }, { "epoch": 3.59, "learning_rate": 3.204209974670277e-05, "loss": 0.8214, "step": 41120 }, { "epoch": 3.59, "learning_rate": 3.203773255306141e-05, "loss": 0.9961, "step": 41130 }, { "epoch": 3.59, "learning_rate": 3.2033365359420035e-05, "loss": 0.7417, "step": 41140 }, { "epoch": 3.59, "learning_rate": 3.2028998165778675e-05, "loss": 0.8967, "step": 41150 }, { "epoch": 3.6, "learning_rate": 3.20246309721373e-05, "loss": 0.971, "step": 41160 }, { "epoch": 3.6, "learning_rate": 3.202026377849594e-05, "loss": 0.8921, "step": 41170 }, { "epoch": 3.6, "learning_rate": 3.2015896584854575e-05, "loss": 0.9081, "step": 41180 }, { "epoch": 3.6, "learning_rate": 3.201152939121321e-05, "loss": 0.8223, "step": 41190 }, { "epoch": 3.6, "learning_rate": 3.200716219757184e-05, "loss": 0.9268, "step": 41200 }, { "epoch": 3.6, "learning_rate": 3.2002795003930474e-05, "loss": 0.8455, "step": 41210 }, { "epoch": 3.6, "learning_rate": 3.1998427810289114e-05, "loss": 0.6371, "step": 41220 }, { "epoch": 3.6, "learning_rate": 3.199406061664774e-05, "loss": 0.7945, "step": 41230 }, { "epoch": 3.6, "learning_rate": 3.198969342300638e-05, "loss": 0.8534, "step": 41240 }, { "epoch": 3.6, "learning_rate": 3.198532622936501e-05, "loss": 0.8177, "step": 41250 }, { "epoch": 3.6, "learning_rate": 3.198095903572365e-05, "loss": 0.8662, "step": 41260 }, { "epoch": 3.6, "learning_rate": 3.197659184208228e-05, "loss": 0.9095, "step": 41270 }, { "epoch": 3.61, "learning_rate": 3.1972224648440914e-05, "loss": 0.8298, "step": 41280 }, { "epoch": 3.61, "learning_rate": 3.196785745479955e-05, "loss": 0.9209, "step": 41290 }, { "epoch": 3.61, "learning_rate": 3.196349026115818e-05, "loss": 0.8777, "step": 41300 }, { "epoch": 3.61, "learning_rate": 3.195912306751681e-05, "loss": 0.8733, "step": 41310 }, { "epoch": 3.61, "learning_rate": 3.1954755873875446e-05, "loss": 0.8564, "step": 41320 }, { "epoch": 3.61, "learning_rate": 3.1950388680234086e-05, "loss": 0.8851, "step": 41330 }, { "epoch": 3.61, "learning_rate": 3.194602148659271e-05, "loss": 0.9355, "step": 41340 }, { "epoch": 3.61, "learning_rate": 3.194165429295135e-05, "loss": 0.8779, "step": 41350 }, { "epoch": 3.61, "learning_rate": 3.1937287099309986e-05, "loss": 1.0599, "step": 41360 }, { "epoch": 3.61, "learning_rate": 3.193291990566862e-05, "loss": 0.9608, "step": 41370 }, { "epoch": 3.61, "learning_rate": 3.192855271202725e-05, "loss": 0.8116, "step": 41380 }, { "epoch": 3.62, "learning_rate": 3.1924185518385886e-05, "loss": 0.994, "step": 41390 }, { "epoch": 3.62, "learning_rate": 3.191981832474452e-05, "loss": 0.9413, "step": 41400 }, { "epoch": 3.62, "learning_rate": 3.191545113110315e-05, "loss": 0.8724, "step": 41410 }, { "epoch": 3.62, "learning_rate": 3.191108393746179e-05, "loss": 0.7833, "step": 41420 }, { "epoch": 3.62, "learning_rate": 3.1906716743820425e-05, "loss": 0.6929, "step": 41430 }, { "epoch": 3.62, "learning_rate": 3.190234955017906e-05, "loss": 0.907, "step": 41440 }, { "epoch": 3.62, "learning_rate": 3.189798235653769e-05, "loss": 0.903, "step": 41450 }, { "epoch": 3.62, "learning_rate": 3.1893615162896325e-05, "loss": 0.7713, "step": 41460 }, { "epoch": 3.62, "learning_rate": 3.188924796925496e-05, "loss": 0.9696, "step": 41470 }, { "epoch": 3.62, "learning_rate": 3.188488077561359e-05, "loss": 0.9506, "step": 41480 }, { "epoch": 3.62, "learning_rate": 3.1880513581972224e-05, "loss": 0.8504, "step": 41490 }, { "epoch": 3.62, "learning_rate": 3.187614638833086e-05, "loss": 0.8821, "step": 41500 }, { "epoch": 3.63, "learning_rate": 3.187177919468949e-05, "loss": 0.9178, "step": 41510 }, { "epoch": 3.63, "learning_rate": 3.186741200104813e-05, "loss": 0.886, "step": 41520 }, { "epoch": 3.63, "learning_rate": 3.1863044807406764e-05, "loss": 0.9602, "step": 41530 }, { "epoch": 3.63, "learning_rate": 3.18586776137654e-05, "loss": 1.0601, "step": 41540 }, { "epoch": 3.63, "learning_rate": 3.185431042012403e-05, "loss": 0.7864, "step": 41550 }, { "epoch": 3.63, "learning_rate": 3.1849943226482664e-05, "loss": 0.9189, "step": 41560 }, { "epoch": 3.63, "learning_rate": 3.18455760328413e-05, "loss": 0.8994, "step": 41570 }, { "epoch": 3.63, "learning_rate": 3.184120883919993e-05, "loss": 0.8932, "step": 41580 }, { "epoch": 3.63, "learning_rate": 3.183684164555857e-05, "loss": 0.8959, "step": 41590 }, { "epoch": 3.63, "learning_rate": 3.1832474451917196e-05, "loss": 0.8341, "step": 41600 }, { "epoch": 3.63, "learning_rate": 3.1828107258275836e-05, "loss": 0.96, "step": 41610 }, { "epoch": 3.64, "learning_rate": 3.182374006463447e-05, "loss": 0.827, "step": 41620 }, { "epoch": 3.64, "learning_rate": 3.18193728709931e-05, "loss": 0.9071, "step": 41630 }, { "epoch": 3.64, "learning_rate": 3.1815005677351736e-05, "loss": 0.8698, "step": 41640 }, { "epoch": 3.64, "learning_rate": 3.181063848371037e-05, "loss": 0.8997, "step": 41650 }, { "epoch": 3.64, "learning_rate": 3.1806271290069e-05, "loss": 0.8201, "step": 41660 }, { "epoch": 3.64, "learning_rate": 3.1801904096427636e-05, "loss": 0.8702, "step": 41670 }, { "epoch": 3.64, "learning_rate": 3.1797536902786276e-05, "loss": 0.882, "step": 41680 }, { "epoch": 3.64, "learning_rate": 3.17931697091449e-05, "loss": 0.8048, "step": 41690 }, { "epoch": 3.64, "learning_rate": 3.178880251550354e-05, "loss": 0.9407, "step": 41700 }, { "epoch": 3.64, "learning_rate": 3.178443532186217e-05, "loss": 0.8538, "step": 41710 }, { "epoch": 3.64, "learning_rate": 3.178006812822081e-05, "loss": 0.8538, "step": 41720 }, { "epoch": 3.64, "learning_rate": 3.177570093457944e-05, "loss": 0.7926, "step": 41730 }, { "epoch": 3.65, "learning_rate": 3.1771333740938075e-05, "loss": 0.9532, "step": 41740 }, { "epoch": 3.65, "learning_rate": 3.176696654729671e-05, "loss": 1.036, "step": 41750 }, { "epoch": 3.65, "learning_rate": 3.176259935365534e-05, "loss": 0.8723, "step": 41760 }, { "epoch": 3.65, "learning_rate": 3.175823216001398e-05, "loss": 0.8232, "step": 41770 }, { "epoch": 3.65, "learning_rate": 3.175386496637261e-05, "loss": 0.7646, "step": 41780 }, { "epoch": 3.65, "learning_rate": 3.174949777273125e-05, "loss": 0.8248, "step": 41790 }, { "epoch": 3.65, "learning_rate": 3.1745130579089874e-05, "loss": 0.8064, "step": 41800 }, { "epoch": 3.65, "learning_rate": 3.1740763385448514e-05, "loss": 0.847, "step": 41810 }, { "epoch": 3.65, "learning_rate": 3.173639619180715e-05, "loss": 0.8946, "step": 41820 }, { "epoch": 3.65, "learning_rate": 3.173202899816578e-05, "loss": 0.8153, "step": 41830 }, { "epoch": 3.65, "learning_rate": 3.1727661804524414e-05, "loss": 0.825, "step": 41840 }, { "epoch": 3.66, "learning_rate": 3.172329461088305e-05, "loss": 0.9208, "step": 41850 }, { "epoch": 3.66, "learning_rate": 3.171892741724169e-05, "loss": 0.8873, "step": 41860 }, { "epoch": 3.66, "learning_rate": 3.171456022360031e-05, "loss": 0.8849, "step": 41870 }, { "epoch": 3.66, "learning_rate": 3.171019302995895e-05, "loss": 1.0493, "step": 41880 }, { "epoch": 3.66, "learning_rate": 3.170582583631758e-05, "loss": 0.9831, "step": 41890 }, { "epoch": 3.66, "learning_rate": 3.170145864267622e-05, "loss": 0.8834, "step": 41900 }, { "epoch": 3.66, "learning_rate": 3.1697091449034846e-05, "loss": 0.9159, "step": 41910 }, { "epoch": 3.66, "learning_rate": 3.1692724255393486e-05, "loss": 0.9835, "step": 41920 }, { "epoch": 3.66, "learning_rate": 3.168835706175212e-05, "loss": 0.8471, "step": 41930 }, { "epoch": 3.66, "learning_rate": 3.168398986811075e-05, "loss": 0.9271, "step": 41940 }, { "epoch": 3.66, "learning_rate": 3.167962267446939e-05, "loss": 0.8998, "step": 41950 }, { "epoch": 3.66, "learning_rate": 3.167525548082802e-05, "loss": 0.9886, "step": 41960 }, { "epoch": 3.67, "learning_rate": 3.167088828718666e-05, "loss": 0.9234, "step": 41970 }, { "epoch": 3.67, "learning_rate": 3.1666521093545285e-05, "loss": 0.9291, "step": 41980 }, { "epoch": 3.67, "learning_rate": 3.1662153899903925e-05, "loss": 0.8476, "step": 41990 }, { "epoch": 3.67, "learning_rate": 3.165778670626256e-05, "loss": 0.7756, "step": 42000 }, { "epoch": 3.67, "learning_rate": 3.165341951262119e-05, "loss": 0.8612, "step": 42010 }, { "epoch": 3.67, "learning_rate": 3.1649052318979825e-05, "loss": 0.8876, "step": 42020 }, { "epoch": 3.67, "learning_rate": 3.164468512533846e-05, "loss": 0.7563, "step": 42030 }, { "epoch": 3.67, "learning_rate": 3.164031793169709e-05, "loss": 0.7682, "step": 42040 }, { "epoch": 3.67, "learning_rate": 3.1635950738055724e-05, "loss": 0.9042, "step": 42050 }, { "epoch": 3.67, "learning_rate": 3.1631583544414364e-05, "loss": 0.8084, "step": 42060 }, { "epoch": 3.67, "learning_rate": 3.162721635077299e-05, "loss": 0.8315, "step": 42070 }, { "epoch": 3.68, "learning_rate": 3.162284915713163e-05, "loss": 0.9665, "step": 42080 }, { "epoch": 3.68, "learning_rate": 3.1618481963490264e-05, "loss": 0.897, "step": 42090 }, { "epoch": 3.68, "learning_rate": 3.16141147698489e-05, "loss": 0.7584, "step": 42100 }, { "epoch": 3.68, "learning_rate": 3.160974757620753e-05, "loss": 0.9097, "step": 42110 }, { "epoch": 3.68, "learning_rate": 3.1605380382566164e-05, "loss": 0.8609, "step": 42120 }, { "epoch": 3.68, "learning_rate": 3.16010131889248e-05, "loss": 1.025, "step": 42130 }, { "epoch": 3.68, "learning_rate": 3.159664599528343e-05, "loss": 0.9093, "step": 42140 }, { "epoch": 3.68, "learning_rate": 3.159227880164207e-05, "loss": 0.8803, "step": 42150 }, { "epoch": 3.68, "learning_rate": 3.1587911608000696e-05, "loss": 0.819, "step": 42160 }, { "epoch": 3.68, "learning_rate": 3.1583544414359336e-05, "loss": 0.8118, "step": 42170 }, { "epoch": 3.68, "learning_rate": 3.157917722071797e-05, "loss": 1.0162, "step": 42180 }, { "epoch": 3.69, "learning_rate": 3.15748100270766e-05, "loss": 0.8362, "step": 42190 }, { "epoch": 3.69, "learning_rate": 3.1570442833435236e-05, "loss": 0.9097, "step": 42200 }, { "epoch": 3.69, "learning_rate": 3.156607563979387e-05, "loss": 0.8227, "step": 42210 }, { "epoch": 3.69, "learning_rate": 3.15617084461525e-05, "loss": 0.8705, "step": 42220 }, { "epoch": 3.69, "learning_rate": 3.1557341252511136e-05, "loss": 0.7744, "step": 42230 }, { "epoch": 3.69, "learning_rate": 3.155297405886977e-05, "loss": 0.9444, "step": 42240 }, { "epoch": 3.69, "learning_rate": 3.154860686522841e-05, "loss": 0.9756, "step": 42250 }, { "epoch": 3.69, "learning_rate": 3.154423967158704e-05, "loss": 0.9315, "step": 42260 }, { "epoch": 3.69, "learning_rate": 3.1539872477945675e-05, "loss": 0.8431, "step": 42270 }, { "epoch": 3.69, "learning_rate": 3.153550528430431e-05, "loss": 0.946, "step": 42280 }, { "epoch": 3.69, "learning_rate": 3.153113809066294e-05, "loss": 0.8241, "step": 42290 }, { "epoch": 3.69, "learning_rate": 3.1526770897021575e-05, "loss": 0.7881, "step": 42300 }, { "epoch": 3.7, "learning_rate": 3.152240370338021e-05, "loss": 0.8683, "step": 42310 }, { "epoch": 3.7, "learning_rate": 3.151803650973884e-05, "loss": 0.8973, "step": 42320 }, { "epoch": 3.7, "learning_rate": 3.1513669316097474e-05, "loss": 0.8281, "step": 42330 }, { "epoch": 3.7, "learning_rate": 3.1509302122456114e-05, "loss": 0.9275, "step": 42340 }, { "epoch": 3.7, "learning_rate": 3.150493492881475e-05, "loss": 1.0208, "step": 42350 }, { "epoch": 3.7, "learning_rate": 3.150056773517338e-05, "loss": 0.8211, "step": 42360 }, { "epoch": 3.7, "learning_rate": 3.1496200541532014e-05, "loss": 0.9102, "step": 42370 }, { "epoch": 3.7, "learning_rate": 3.149183334789065e-05, "loss": 0.9167, "step": 42380 }, { "epoch": 3.7, "learning_rate": 3.148746615424928e-05, "loss": 0.9598, "step": 42390 }, { "epoch": 3.7, "learning_rate": 3.1483098960607914e-05, "loss": 0.8485, "step": 42400 }, { "epoch": 3.7, "learning_rate": 3.1478731766966554e-05, "loss": 0.9151, "step": 42410 }, { "epoch": 3.71, "learning_rate": 3.147436457332518e-05, "loss": 0.8958, "step": 42420 }, { "epoch": 3.71, "learning_rate": 3.146999737968382e-05, "loss": 0.8962, "step": 42430 }, { "epoch": 3.71, "learning_rate": 3.1465630186042446e-05, "loss": 0.8316, "step": 42440 }, { "epoch": 3.71, "learning_rate": 3.1461262992401086e-05, "loss": 0.8716, "step": 42450 }, { "epoch": 3.71, "learning_rate": 3.145689579875972e-05, "loss": 0.8146, "step": 42460 }, { "epoch": 3.71, "learning_rate": 3.145252860511835e-05, "loss": 0.8882, "step": 42470 }, { "epoch": 3.71, "learning_rate": 3.1448161411476986e-05, "loss": 0.8491, "step": 42480 }, { "epoch": 3.71, "learning_rate": 3.144379421783562e-05, "loss": 0.8211, "step": 42490 }, { "epoch": 3.71, "learning_rate": 3.143942702419426e-05, "loss": 0.9726, "step": 42500 }, { "epoch": 3.71, "learning_rate": 3.1435059830552886e-05, "loss": 0.8885, "step": 42510 }, { "epoch": 3.71, "learning_rate": 3.1430692636911526e-05, "loss": 0.8604, "step": 42520 }, { "epoch": 3.71, "learning_rate": 3.142632544327015e-05, "loss": 0.8774, "step": 42530 }, { "epoch": 3.72, "learning_rate": 3.142195824962879e-05, "loss": 0.935, "step": 42540 }, { "epoch": 3.72, "learning_rate": 3.1417591055987425e-05, "loss": 0.822, "step": 42550 }, { "epoch": 3.72, "learning_rate": 3.141322386234606e-05, "loss": 0.8608, "step": 42560 }, { "epoch": 3.72, "learning_rate": 3.140885666870469e-05, "loss": 0.8284, "step": 42570 }, { "epoch": 3.72, "learning_rate": 3.1404489475063325e-05, "loss": 0.8469, "step": 42580 }, { "epoch": 3.72, "learning_rate": 3.1400122281421965e-05, "loss": 0.9802, "step": 42590 }, { "epoch": 3.72, "learning_rate": 3.139575508778059e-05, "loss": 0.9199, "step": 42600 }, { "epoch": 3.72, "learning_rate": 3.139138789413923e-05, "loss": 0.897, "step": 42610 }, { "epoch": 3.72, "learning_rate": 3.138702070049786e-05, "loss": 0.7278, "step": 42620 }, { "epoch": 3.72, "learning_rate": 3.13826535068565e-05, "loss": 0.8469, "step": 42630 }, { "epoch": 3.72, "learning_rate": 3.1378286313215124e-05, "loss": 0.9664, "step": 42640 }, { "epoch": 3.73, "learning_rate": 3.1373919119573764e-05, "loss": 0.8784, "step": 42650 }, { "epoch": 3.73, "learning_rate": 3.13695519259324e-05, "loss": 0.8639, "step": 42660 }, { "epoch": 3.73, "learning_rate": 3.136518473229103e-05, "loss": 0.8294, "step": 42670 }, { "epoch": 3.73, "learning_rate": 3.136081753864967e-05, "loss": 0.957, "step": 42680 }, { "epoch": 3.73, "learning_rate": 3.13564503450083e-05, "loss": 0.8611, "step": 42690 }, { "epoch": 3.73, "learning_rate": 3.135208315136694e-05, "loss": 0.8494, "step": 42700 }, { "epoch": 3.73, "learning_rate": 3.134771595772556e-05, "loss": 1.0271, "step": 42710 }, { "epoch": 3.73, "learning_rate": 3.13433487640842e-05, "loss": 0.8439, "step": 42720 }, { "epoch": 3.73, "learning_rate": 3.133898157044283e-05, "loss": 1.0093, "step": 42730 }, { "epoch": 3.73, "learning_rate": 3.133461437680147e-05, "loss": 0.8798, "step": 42740 }, { "epoch": 3.73, "learning_rate": 3.13302471831601e-05, "loss": 0.854, "step": 42750 }, { "epoch": 3.73, "learning_rate": 3.1325879989518736e-05, "loss": 0.8572, "step": 42760 }, { "epoch": 3.74, "learning_rate": 3.132151279587737e-05, "loss": 0.9091, "step": 42770 }, { "epoch": 3.74, "learning_rate": 3.1317145602236e-05, "loss": 0.9104, "step": 42780 }, { "epoch": 3.74, "learning_rate": 3.131277840859464e-05, "loss": 0.8756, "step": 42790 }, { "epoch": 3.74, "learning_rate": 3.130841121495327e-05, "loss": 0.9232, "step": 42800 }, { "epoch": 3.74, "learning_rate": 3.130404402131191e-05, "loss": 0.9385, "step": 42810 }, { "epoch": 3.74, "learning_rate": 3.1299676827670535e-05, "loss": 0.8882, "step": 42820 }, { "epoch": 3.74, "learning_rate": 3.1295309634029175e-05, "loss": 0.8354, "step": 42830 }, { "epoch": 3.74, "learning_rate": 3.129094244038781e-05, "loss": 0.9209, "step": 42840 }, { "epoch": 3.74, "learning_rate": 3.128657524674644e-05, "loss": 0.9238, "step": 42850 }, { "epoch": 3.74, "learning_rate": 3.1282208053105075e-05, "loss": 0.8358, "step": 42860 }, { "epoch": 3.74, "learning_rate": 3.127784085946371e-05, "loss": 0.9722, "step": 42870 }, { "epoch": 3.75, "learning_rate": 3.127347366582235e-05, "loss": 0.8603, "step": 42880 }, { "epoch": 3.75, "learning_rate": 3.1269106472180974e-05, "loss": 1.0037, "step": 42890 }, { "epoch": 3.75, "learning_rate": 3.1264739278539614e-05, "loss": 0.8785, "step": 42900 }, { "epoch": 3.75, "learning_rate": 3.126037208489825e-05, "loss": 1.0532, "step": 42910 }, { "epoch": 3.75, "learning_rate": 3.125600489125688e-05, "loss": 0.7886, "step": 42920 }, { "epoch": 3.75, "learning_rate": 3.1251637697615514e-05, "loss": 0.8847, "step": 42930 }, { "epoch": 3.75, "learning_rate": 3.124727050397415e-05, "loss": 0.8906, "step": 42940 }, { "epoch": 3.75, "learning_rate": 3.124290331033278e-05, "loss": 0.9062, "step": 42950 }, { "epoch": 3.75, "learning_rate": 3.1238536116691414e-05, "loss": 0.9387, "step": 42960 }, { "epoch": 3.75, "learning_rate": 3.123416892305005e-05, "loss": 0.8708, "step": 42970 }, { "epoch": 3.75, "learning_rate": 3.122980172940868e-05, "loss": 0.7349, "step": 42980 }, { "epoch": 3.75, "learning_rate": 3.122543453576732e-05, "loss": 0.9949, "step": 42990 }, { "epoch": 3.76, "learning_rate": 3.122106734212595e-05, "loss": 0.8453, "step": 43000 }, { "epoch": 3.76, "learning_rate": 3.1216700148484586e-05, "loss": 0.7873, "step": 43010 }, { "epoch": 3.76, "learning_rate": 3.121233295484322e-05, "loss": 0.8589, "step": 43020 }, { "epoch": 3.76, "learning_rate": 3.120796576120185e-05, "loss": 0.8144, "step": 43030 }, { "epoch": 3.76, "learning_rate": 3.1203598567560486e-05, "loss": 0.8673, "step": 43040 }, { "epoch": 3.76, "learning_rate": 3.119923137391912e-05, "loss": 0.8469, "step": 43050 }, { "epoch": 3.76, "learning_rate": 3.119486418027775e-05, "loss": 0.8125, "step": 43060 }, { "epoch": 3.76, "learning_rate": 3.119049698663639e-05, "loss": 0.8196, "step": 43070 }, { "epoch": 3.76, "learning_rate": 3.1186129792995026e-05, "loss": 0.9286, "step": 43080 }, { "epoch": 3.76, "learning_rate": 3.118176259935366e-05, "loss": 0.8064, "step": 43090 }, { "epoch": 3.76, "learning_rate": 3.117739540571229e-05, "loss": 0.9786, "step": 43100 }, { "epoch": 3.77, "learning_rate": 3.1173028212070925e-05, "loss": 0.8825, "step": 43110 }, { "epoch": 3.77, "learning_rate": 3.116866101842956e-05, "loss": 0.8599, "step": 43120 }, { "epoch": 3.77, "learning_rate": 3.116429382478819e-05, "loss": 0.9517, "step": 43130 }, { "epoch": 3.77, "learning_rate": 3.1159926631146825e-05, "loss": 0.7728, "step": 43140 }, { "epoch": 3.77, "learning_rate": 3.115555943750546e-05, "loss": 0.945, "step": 43150 }, { "epoch": 3.77, "learning_rate": 3.11511922438641e-05, "loss": 0.9433, "step": 43160 }, { "epoch": 3.77, "learning_rate": 3.1146825050222724e-05, "loss": 0.9269, "step": 43170 }, { "epoch": 3.77, "learning_rate": 3.1142457856581364e-05, "loss": 0.8254, "step": 43180 }, { "epoch": 3.77, "learning_rate": 3.113809066294e-05, "loss": 0.9547, "step": 43190 }, { "epoch": 3.77, "learning_rate": 3.113372346929863e-05, "loss": 0.8691, "step": 43200 }, { "epoch": 3.77, "learning_rate": 3.1129356275657264e-05, "loss": 1.0065, "step": 43210 }, { "epoch": 3.78, "learning_rate": 3.11249890820159e-05, "loss": 0.8713, "step": 43220 }, { "epoch": 3.78, "learning_rate": 3.112062188837453e-05, "loss": 0.9444, "step": 43230 }, { "epoch": 3.78, "learning_rate": 3.1116254694733164e-05, "loss": 0.8744, "step": 43240 }, { "epoch": 3.78, "learning_rate": 3.1111887501091804e-05, "loss": 0.8044, "step": 43250 }, { "epoch": 3.78, "learning_rate": 3.110752030745043e-05, "loss": 0.8986, "step": 43260 }, { "epoch": 3.78, "learning_rate": 3.110315311380907e-05, "loss": 0.9637, "step": 43270 }, { "epoch": 3.78, "learning_rate": 3.10987859201677e-05, "loss": 0.8223, "step": 43280 }, { "epoch": 3.78, "learning_rate": 3.1094418726526336e-05, "loss": 0.9524, "step": 43290 }, { "epoch": 3.78, "learning_rate": 3.109005153288497e-05, "loss": 0.9054, "step": 43300 }, { "epoch": 3.78, "learning_rate": 3.10856843392436e-05, "loss": 0.8869, "step": 43310 }, { "epoch": 3.78, "learning_rate": 3.108131714560224e-05, "loss": 0.8938, "step": 43320 }, { "epoch": 3.78, "learning_rate": 3.107694995196087e-05, "loss": 0.8972, "step": 43330 }, { "epoch": 3.79, "learning_rate": 3.107258275831951e-05, "loss": 0.7384, "step": 43340 }, { "epoch": 3.79, "learning_rate": 3.1068215564678136e-05, "loss": 0.8034, "step": 43350 }, { "epoch": 3.79, "learning_rate": 3.1063848371036776e-05, "loss": 0.8361, "step": 43360 }, { "epoch": 3.79, "learning_rate": 3.10594811773954e-05, "loss": 0.8795, "step": 43370 }, { "epoch": 3.79, "learning_rate": 3.105511398375404e-05, "loss": 0.864, "step": 43380 }, { "epoch": 3.79, "learning_rate": 3.1050746790112675e-05, "loss": 0.7625, "step": 43390 }, { "epoch": 3.79, "learning_rate": 3.104637959647131e-05, "loss": 1.0542, "step": 43400 }, { "epoch": 3.79, "learning_rate": 3.104201240282995e-05, "loss": 0.8832, "step": 43410 }, { "epoch": 3.79, "learning_rate": 3.1037645209188575e-05, "loss": 0.8832, "step": 43420 }, { "epoch": 3.79, "learning_rate": 3.1033278015547215e-05, "loss": 0.8357, "step": 43430 }, { "epoch": 3.79, "learning_rate": 3.102891082190584e-05, "loss": 0.8637, "step": 43440 }, { "epoch": 3.8, "learning_rate": 3.102454362826448e-05, "loss": 0.8271, "step": 43450 }, { "epoch": 3.8, "learning_rate": 3.102017643462311e-05, "loss": 0.82, "step": 43460 }, { "epoch": 3.8, "learning_rate": 3.101580924098175e-05, "loss": 0.9699, "step": 43470 }, { "epoch": 3.8, "learning_rate": 3.101144204734038e-05, "loss": 0.8732, "step": 43480 }, { "epoch": 3.8, "learning_rate": 3.1007074853699014e-05, "loss": 0.8701, "step": 43490 }, { "epoch": 3.8, "learning_rate": 3.100270766005765e-05, "loss": 0.8537, "step": 43500 }, { "epoch": 3.8, "learning_rate": 3.099834046641628e-05, "loss": 0.9252, "step": 43510 }, { "epoch": 3.8, "learning_rate": 3.099397327277492e-05, "loss": 0.8445, "step": 43520 }, { "epoch": 3.8, "learning_rate": 3.098960607913355e-05, "loss": 0.8644, "step": 43530 }, { "epoch": 3.8, "learning_rate": 3.098523888549219e-05, "loss": 0.7856, "step": 43540 }, { "epoch": 3.8, "learning_rate": 3.098087169185081e-05, "loss": 0.8657, "step": 43550 }, { "epoch": 3.8, "learning_rate": 3.097650449820945e-05, "loss": 0.9483, "step": 43560 }, { "epoch": 3.81, "learning_rate": 3.0972137304568086e-05, "loss": 0.8654, "step": 43570 }, { "epoch": 3.81, "learning_rate": 3.096777011092672e-05, "loss": 0.9197, "step": 43580 }, { "epoch": 3.81, "learning_rate": 3.096340291728535e-05, "loss": 1.0204, "step": 43590 }, { "epoch": 3.81, "learning_rate": 3.0959035723643986e-05, "loss": 0.9363, "step": 43600 }, { "epoch": 3.81, "learning_rate": 3.0954668530002626e-05, "loss": 0.9304, "step": 43610 }, { "epoch": 3.81, "learning_rate": 3.095030133636125e-05, "loss": 0.9158, "step": 43620 }, { "epoch": 3.81, "learning_rate": 3.094593414271989e-05, "loss": 0.7709, "step": 43630 }, { "epoch": 3.81, "learning_rate": 3.094156694907852e-05, "loss": 0.9934, "step": 43640 }, { "epoch": 3.81, "learning_rate": 3.093719975543716e-05, "loss": 0.98, "step": 43650 }, { "epoch": 3.81, "learning_rate": 3.093283256179579e-05, "loss": 0.8976, "step": 43660 }, { "epoch": 3.81, "learning_rate": 3.0928465368154425e-05, "loss": 0.8752, "step": 43670 }, { "epoch": 3.82, "learning_rate": 3.092409817451306e-05, "loss": 0.8025, "step": 43680 }, { "epoch": 3.82, "learning_rate": 3.091973098087169e-05, "loss": 0.8493, "step": 43690 }, { "epoch": 3.82, "learning_rate": 3.0915363787230325e-05, "loss": 0.9826, "step": 43700 }, { "epoch": 3.82, "learning_rate": 3.091099659358896e-05, "loss": 0.8446, "step": 43710 }, { "epoch": 3.82, "learning_rate": 3.09066293999476e-05, "loss": 0.9638, "step": 43720 }, { "epoch": 3.82, "learning_rate": 3.090226220630623e-05, "loss": 0.8279, "step": 43730 }, { "epoch": 3.82, "learning_rate": 3.0897895012664864e-05, "loss": 0.9504, "step": 43740 }, { "epoch": 3.82, "learning_rate": 3.08935278190235e-05, "loss": 0.9174, "step": 43750 }, { "epoch": 3.82, "learning_rate": 3.088916062538213e-05, "loss": 0.9312, "step": 43760 }, { "epoch": 3.82, "learning_rate": 3.0884793431740764e-05, "loss": 0.9617, "step": 43770 }, { "epoch": 3.82, "learning_rate": 3.08804262380994e-05, "loss": 0.8582, "step": 43780 }, { "epoch": 3.82, "learning_rate": 3.087605904445803e-05, "loss": 0.9046, "step": 43790 }, { "epoch": 3.83, "learning_rate": 3.0871691850816664e-05, "loss": 0.8916, "step": 43800 }, { "epoch": 3.83, "learning_rate": 3.0867324657175304e-05, "loss": 0.8075, "step": 43810 }, { "epoch": 3.83, "learning_rate": 3.086295746353394e-05, "loss": 0.8718, "step": 43820 }, { "epoch": 3.83, "learning_rate": 3.085859026989257e-05, "loss": 0.912, "step": 43830 }, { "epoch": 3.83, "learning_rate": 3.08542230762512e-05, "loss": 0.9202, "step": 43840 }, { "epoch": 3.83, "learning_rate": 3.0849855882609836e-05, "loss": 0.8517, "step": 43850 }, { "epoch": 3.83, "learning_rate": 3.084548868896847e-05, "loss": 0.9324, "step": 43860 }, { "epoch": 3.83, "learning_rate": 3.08411214953271e-05, "loss": 0.9605, "step": 43870 }, { "epoch": 3.83, "learning_rate": 3.0836754301685736e-05, "loss": 0.8232, "step": 43880 }, { "epoch": 3.83, "learning_rate": 3.0832387108044376e-05, "loss": 0.9938, "step": 43890 }, { "epoch": 3.83, "learning_rate": 3.0828019914403e-05, "loss": 0.8737, "step": 43900 }, { "epoch": 3.84, "learning_rate": 3.082365272076164e-05, "loss": 0.878, "step": 43910 }, { "epoch": 3.84, "learning_rate": 3.0819285527120276e-05, "loss": 0.8445, "step": 43920 }, { "epoch": 3.84, "learning_rate": 3.081491833347891e-05, "loss": 0.8087, "step": 43930 }, { "epoch": 3.84, "learning_rate": 3.081055113983754e-05, "loss": 0.9099, "step": 43940 }, { "epoch": 3.84, "learning_rate": 3.0806183946196175e-05, "loss": 0.7952, "step": 43950 }, { "epoch": 3.84, "learning_rate": 3.080181675255481e-05, "loss": 0.8154, "step": 43960 }, { "epoch": 3.84, "learning_rate": 3.079744955891344e-05, "loss": 0.7892, "step": 43970 }, { "epoch": 3.84, "learning_rate": 3.079308236527208e-05, "loss": 0.8371, "step": 43980 }, { "epoch": 3.84, "learning_rate": 3.078871517163071e-05, "loss": 0.7869, "step": 43990 }, { "epoch": 3.84, "learning_rate": 3.078434797798935e-05, "loss": 0.8377, "step": 44000 }, { "epoch": 3.84, "learning_rate": 3.077998078434798e-05, "loss": 0.9768, "step": 44010 }, { "epoch": 3.84, "learning_rate": 3.0775613590706614e-05, "loss": 0.8085, "step": 44020 }, { "epoch": 3.85, "learning_rate": 3.077124639706525e-05, "loss": 1.0397, "step": 44030 }, { "epoch": 3.85, "learning_rate": 3.076687920342388e-05, "loss": 0.7774, "step": 44040 }, { "epoch": 3.85, "learning_rate": 3.0762512009782514e-05, "loss": 0.8788, "step": 44050 }, { "epoch": 3.85, "learning_rate": 3.075814481614115e-05, "loss": 1.0319, "step": 44060 }, { "epoch": 3.85, "learning_rate": 3.075377762249979e-05, "loss": 0.8404, "step": 44070 }, { "epoch": 3.85, "learning_rate": 3.0749410428858414e-05, "loss": 0.9451, "step": 44080 }, { "epoch": 3.85, "learning_rate": 3.0745043235217054e-05, "loss": 0.8649, "step": 44090 }, { "epoch": 3.85, "learning_rate": 3.074067604157568e-05, "loss": 0.9432, "step": 44100 }, { "epoch": 3.85, "learning_rate": 3.073630884793432e-05, "loss": 0.824, "step": 44110 }, { "epoch": 3.85, "learning_rate": 3.073194165429295e-05, "loss": 0.8924, "step": 44120 }, { "epoch": 3.85, "learning_rate": 3.0727574460651586e-05, "loss": 0.8765, "step": 44130 }, { "epoch": 3.86, "learning_rate": 3.0723207267010226e-05, "loss": 0.8064, "step": 44140 }, { "epoch": 3.86, "learning_rate": 3.071884007336885e-05, "loss": 0.7858, "step": 44150 }, { "epoch": 3.86, "learning_rate": 3.071447287972749e-05, "loss": 0.8751, "step": 44160 }, { "epoch": 3.86, "learning_rate": 3.071010568608612e-05, "loss": 1.022, "step": 44170 }, { "epoch": 3.86, "learning_rate": 3.070573849244476e-05, "loss": 0.993, "step": 44180 }, { "epoch": 3.86, "learning_rate": 3.0701371298803386e-05, "loss": 0.7048, "step": 44190 }, { "epoch": 3.86, "learning_rate": 3.0697004105162026e-05, "loss": 0.7302, "step": 44200 }, { "epoch": 3.86, "learning_rate": 3.069263691152066e-05, "loss": 0.8363, "step": 44210 }, { "epoch": 3.86, "learning_rate": 3.068826971787929e-05, "loss": 0.795, "step": 44220 }, { "epoch": 3.86, "learning_rate": 3.0683902524237925e-05, "loss": 0.9919, "step": 44230 }, { "epoch": 3.86, "learning_rate": 3.067953533059656e-05, "loss": 0.9153, "step": 44240 }, { "epoch": 3.86, "learning_rate": 3.06751681369552e-05, "loss": 1.0438, "step": 44250 }, { "epoch": 3.87, "learning_rate": 3.0670800943313825e-05, "loss": 0.9593, "step": 44260 }, { "epoch": 3.87, "learning_rate": 3.0666433749672465e-05, "loss": 0.9028, "step": 44270 }, { "epoch": 3.87, "learning_rate": 3.066206655603109e-05, "loss": 0.9048, "step": 44280 }, { "epoch": 3.87, "learning_rate": 3.065769936238973e-05, "loss": 0.8219, "step": 44290 }, { "epoch": 3.87, "learning_rate": 3.065333216874836e-05, "loss": 0.9463, "step": 44300 }, { "epoch": 3.87, "learning_rate": 3.0648964975107e-05, "loss": 0.9641, "step": 44310 }, { "epoch": 3.87, "learning_rate": 3.064459778146563e-05, "loss": 0.8917, "step": 44320 }, { "epoch": 3.87, "learning_rate": 3.0640230587824264e-05, "loss": 0.8473, "step": 44330 }, { "epoch": 3.87, "learning_rate": 3.0635863394182904e-05, "loss": 0.9426, "step": 44340 }, { "epoch": 3.87, "learning_rate": 3.063149620054153e-05, "loss": 0.826, "step": 44350 }, { "epoch": 3.87, "learning_rate": 3.062712900690017e-05, "loss": 0.8288, "step": 44360 }, { "epoch": 3.88, "learning_rate": 3.06227618132588e-05, "loss": 0.8898, "step": 44370 }, { "epoch": 3.88, "learning_rate": 3.061839461961744e-05, "loss": 0.8747, "step": 44380 }, { "epoch": 3.88, "learning_rate": 3.061402742597607e-05, "loss": 0.8628, "step": 44390 }, { "epoch": 3.88, "learning_rate": 3.06096602323347e-05, "loss": 0.96, "step": 44400 }, { "epoch": 3.88, "learning_rate": 3.0605293038693337e-05, "loss": 0.7989, "step": 44410 }, { "epoch": 3.88, "learning_rate": 3.060092584505197e-05, "loss": 0.829, "step": 44420 }, { "epoch": 3.88, "learning_rate": 3.05965586514106e-05, "loss": 0.8608, "step": 44430 }, { "epoch": 3.88, "learning_rate": 3.0592191457769236e-05, "loss": 0.7868, "step": 44440 }, { "epoch": 3.88, "learning_rate": 3.0587824264127876e-05, "loss": 0.8875, "step": 44450 }, { "epoch": 3.88, "learning_rate": 3.05834570704865e-05, "loss": 0.9022, "step": 44460 }, { "epoch": 3.88, "learning_rate": 3.057908987684514e-05, "loss": 1.0184, "step": 44470 }, { "epoch": 3.89, "learning_rate": 3.0574722683203776e-05, "loss": 0.9331, "step": 44480 }, { "epoch": 3.89, "learning_rate": 3.057035548956241e-05, "loss": 0.7171, "step": 44490 }, { "epoch": 3.89, "learning_rate": 3.056598829592104e-05, "loss": 0.9207, "step": 44500 }, { "epoch": 3.89, "learning_rate": 3.0561621102279675e-05, "loss": 0.6787, "step": 44510 }, { "epoch": 3.89, "learning_rate": 3.055725390863831e-05, "loss": 0.796, "step": 44520 }, { "epoch": 3.89, "learning_rate": 3.055288671499694e-05, "loss": 0.8625, "step": 44530 }, { "epoch": 3.89, "learning_rate": 3.054851952135558e-05, "loss": 0.9321, "step": 44540 }, { "epoch": 3.89, "learning_rate": 3.0544152327714215e-05, "loss": 0.8511, "step": 44550 }, { "epoch": 3.89, "learning_rate": 3.053978513407285e-05, "loss": 0.8972, "step": 44560 }, { "epoch": 3.89, "learning_rate": 3.053541794043148e-05, "loss": 0.9302, "step": 44570 }, { "epoch": 3.89, "learning_rate": 3.0531050746790115e-05, "loss": 1.0275, "step": 44580 }, { "epoch": 3.89, "learning_rate": 3.052668355314875e-05, "loss": 0.9232, "step": 44590 }, { "epoch": 3.9, "learning_rate": 3.052231635950738e-05, "loss": 0.9392, "step": 44600 }, { "epoch": 3.9, "learning_rate": 3.0517949165866014e-05, "loss": 0.9044, "step": 44610 }, { "epoch": 3.9, "learning_rate": 3.0513581972224647e-05, "loss": 0.9134, "step": 44620 }, { "epoch": 3.9, "learning_rate": 3.0509214778583284e-05, "loss": 0.8338, "step": 44630 }, { "epoch": 3.9, "learning_rate": 3.050484758494192e-05, "loss": 0.9729, "step": 44640 }, { "epoch": 3.9, "learning_rate": 3.050048039130055e-05, "loss": 0.9443, "step": 44650 }, { "epoch": 3.9, "learning_rate": 3.0496113197659187e-05, "loss": 0.7804, "step": 44660 }, { "epoch": 3.9, "learning_rate": 3.0491746004017817e-05, "loss": 0.864, "step": 44670 }, { "epoch": 3.9, "learning_rate": 3.0487378810376453e-05, "loss": 0.8553, "step": 44680 }, { "epoch": 3.9, "learning_rate": 3.0483011616735087e-05, "loss": 0.8481, "step": 44690 }, { "epoch": 3.9, "learning_rate": 3.0478644423093723e-05, "loss": 0.852, "step": 44700 }, { "epoch": 3.91, "learning_rate": 3.0474277229452353e-05, "loss": 0.7386, "step": 44710 }, { "epoch": 3.91, "learning_rate": 3.046991003581099e-05, "loss": 1.0353, "step": 44720 }, { "epoch": 3.91, "learning_rate": 3.0465542842169626e-05, "loss": 0.8335, "step": 44730 }, { "epoch": 3.91, "learning_rate": 3.0461175648528256e-05, "loss": 0.8874, "step": 44740 }, { "epoch": 3.91, "learning_rate": 3.0456808454886893e-05, "loss": 0.8394, "step": 44750 }, { "epoch": 3.91, "learning_rate": 3.0452441261245522e-05, "loss": 0.9353, "step": 44760 }, { "epoch": 3.91, "learning_rate": 3.044807406760416e-05, "loss": 0.9343, "step": 44770 }, { "epoch": 3.91, "learning_rate": 3.0443706873962792e-05, "loss": 0.8377, "step": 44780 }, { "epoch": 3.91, "learning_rate": 3.043933968032143e-05, "loss": 0.9362, "step": 44790 }, { "epoch": 3.91, "learning_rate": 3.0434972486680062e-05, "loss": 1.0287, "step": 44800 }, { "epoch": 3.91, "learning_rate": 3.0430605293038695e-05, "loss": 0.9655, "step": 44810 }, { "epoch": 3.91, "learning_rate": 3.0426238099397332e-05, "loss": 0.8478, "step": 44820 }, { "epoch": 3.92, "learning_rate": 3.042187090575596e-05, "loss": 0.8815, "step": 44830 }, { "epoch": 3.92, "learning_rate": 3.0417503712114598e-05, "loss": 0.9542, "step": 44840 }, { "epoch": 3.92, "learning_rate": 3.0413136518473228e-05, "loss": 0.8578, "step": 44850 }, { "epoch": 3.92, "learning_rate": 3.0408769324831865e-05, "loss": 0.9245, "step": 44860 }, { "epoch": 3.92, "learning_rate": 3.0404402131190494e-05, "loss": 0.9584, "step": 44870 }, { "epoch": 3.92, "learning_rate": 3.040003493754913e-05, "loss": 0.8226, "step": 44880 }, { "epoch": 3.92, "learning_rate": 3.0395667743907768e-05, "loss": 0.8253, "step": 44890 }, { "epoch": 3.92, "learning_rate": 3.03913005502664e-05, "loss": 0.918, "step": 44900 }, { "epoch": 3.92, "learning_rate": 3.0386933356625037e-05, "loss": 0.9091, "step": 44910 }, { "epoch": 3.92, "learning_rate": 3.0382566162983667e-05, "loss": 0.9113, "step": 44920 }, { "epoch": 3.92, "learning_rate": 3.0378198969342304e-05, "loss": 0.793, "step": 44930 }, { "epoch": 3.93, "learning_rate": 3.0373831775700934e-05, "loss": 0.9182, "step": 44940 }, { "epoch": 3.93, "learning_rate": 3.036946458205957e-05, "loss": 0.7742, "step": 44950 }, { "epoch": 3.93, "learning_rate": 3.0365097388418207e-05, "loss": 0.8582, "step": 44960 }, { "epoch": 3.93, "learning_rate": 3.0360730194776837e-05, "loss": 0.8507, "step": 44970 }, { "epoch": 3.93, "learning_rate": 3.0356363001135473e-05, "loss": 0.8557, "step": 44980 }, { "epoch": 3.93, "learning_rate": 3.0351995807494106e-05, "loss": 0.8028, "step": 44990 }, { "epoch": 3.93, "learning_rate": 3.034762861385274e-05, "loss": 0.877, "step": 45000 }, { "epoch": 3.93, "eval_accuracy": 0.5826864498378697, "eval_loss": 0.8853797316551208, "eval_runtime": 84.0773, "eval_samples_per_second": 121.043, "eval_steps_per_second": 15.141, "step": 45000 }, { "epoch": 3.93, "learning_rate": 3.0343261420211373e-05, "loss": 0.9642, "step": 45010 }, { "epoch": 3.93, "learning_rate": 3.033889422657001e-05, "loss": 0.8797, "step": 45020 }, { "epoch": 3.93, "learning_rate": 3.033452703292864e-05, "loss": 0.8844, "step": 45030 }, { "epoch": 3.93, "learning_rate": 3.0330159839287276e-05, "loss": 0.9339, "step": 45040 }, { "epoch": 3.93, "learning_rate": 3.0325792645645912e-05, "loss": 0.8161, "step": 45050 }, { "epoch": 3.94, "learning_rate": 3.0321425452004542e-05, "loss": 0.9955, "step": 45060 }, { "epoch": 3.94, "learning_rate": 3.031705825836318e-05, "loss": 0.8497, "step": 45070 }, { "epoch": 3.94, "learning_rate": 3.031269106472181e-05, "loss": 0.9236, "step": 45080 }, { "epoch": 3.94, "learning_rate": 3.0308323871080445e-05, "loss": 0.9742, "step": 45090 }, { "epoch": 3.94, "learning_rate": 3.030395667743908e-05, "loss": 0.9517, "step": 45100 }, { "epoch": 3.94, "learning_rate": 3.0299589483797715e-05, "loss": 0.9418, "step": 45110 }, { "epoch": 3.94, "learning_rate": 3.0295222290156345e-05, "loss": 0.8854, "step": 45120 }, { "epoch": 3.94, "learning_rate": 3.029085509651498e-05, "loss": 0.7869, "step": 45130 }, { "epoch": 3.94, "learning_rate": 3.0286487902873618e-05, "loss": 0.8301, "step": 45140 }, { "epoch": 3.94, "learning_rate": 3.0282120709232248e-05, "loss": 0.8901, "step": 45150 }, { "epoch": 3.94, "learning_rate": 3.0277753515590884e-05, "loss": 0.8435, "step": 45160 }, { "epoch": 3.95, "learning_rate": 3.0273386321949514e-05, "loss": 0.9228, "step": 45170 }, { "epoch": 3.95, "learning_rate": 3.026901912830815e-05, "loss": 0.9797, "step": 45180 }, { "epoch": 3.95, "learning_rate": 3.0264651934666784e-05, "loss": 0.7431, "step": 45190 }, { "epoch": 3.95, "learning_rate": 3.0260284741025417e-05, "loss": 0.958, "step": 45200 }, { "epoch": 3.95, "learning_rate": 3.0255917547384054e-05, "loss": 0.9199, "step": 45210 }, { "epoch": 3.95, "learning_rate": 3.0251550353742687e-05, "loss": 0.8757, "step": 45220 }, { "epoch": 3.95, "learning_rate": 3.0247183160101324e-05, "loss": 0.9024, "step": 45230 }, { "epoch": 3.95, "learning_rate": 3.0242815966459953e-05, "loss": 0.703, "step": 45240 }, { "epoch": 3.95, "learning_rate": 3.023844877281859e-05, "loss": 0.8818, "step": 45250 }, { "epoch": 3.95, "learning_rate": 3.023408157917722e-05, "loss": 0.7428, "step": 45260 }, { "epoch": 3.95, "learning_rate": 3.0229714385535856e-05, "loss": 0.8732, "step": 45270 }, { "epoch": 3.95, "learning_rate": 3.0225347191894486e-05, "loss": 0.8771, "step": 45280 }, { "epoch": 3.96, "learning_rate": 3.0220979998253123e-05, "loss": 0.9251, "step": 45290 }, { "epoch": 3.96, "learning_rate": 3.021661280461176e-05, "loss": 0.9451, "step": 45300 }, { "epoch": 3.96, "learning_rate": 3.0212245610970393e-05, "loss": 0.7999, "step": 45310 }, { "epoch": 3.96, "learning_rate": 3.020787841732903e-05, "loss": 0.9231, "step": 45320 }, { "epoch": 3.96, "learning_rate": 3.020351122368766e-05, "loss": 0.9516, "step": 45330 }, { "epoch": 3.96, "learning_rate": 3.0199144030046296e-05, "loss": 0.9275, "step": 45340 }, { "epoch": 3.96, "learning_rate": 3.0194776836404925e-05, "loss": 0.8811, "step": 45350 }, { "epoch": 3.96, "learning_rate": 3.0190409642763562e-05, "loss": 0.8381, "step": 45360 }, { "epoch": 3.96, "learning_rate": 3.01860424491222e-05, "loss": 0.8068, "step": 45370 }, { "epoch": 3.96, "learning_rate": 3.018167525548083e-05, "loss": 0.7744, "step": 45380 }, { "epoch": 3.96, "learning_rate": 3.0177308061839465e-05, "loss": 0.8411, "step": 45390 }, { "epoch": 3.97, "learning_rate": 3.0172940868198095e-05, "loss": 0.9746, "step": 45400 }, { "epoch": 3.97, "learning_rate": 3.016857367455673e-05, "loss": 0.7749, "step": 45410 }, { "epoch": 3.97, "learning_rate": 3.0164206480915365e-05, "loss": 0.8085, "step": 45420 }, { "epoch": 3.97, "learning_rate": 3.0159839287274e-05, "loss": 0.8292, "step": 45430 }, { "epoch": 3.97, "learning_rate": 3.015547209363263e-05, "loss": 0.9, "step": 45440 }, { "epoch": 3.97, "learning_rate": 3.0151104899991268e-05, "loss": 0.9002, "step": 45450 }, { "epoch": 3.97, "learning_rate": 3.0146737706349904e-05, "loss": 1.0225, "step": 45460 }, { "epoch": 3.97, "learning_rate": 3.0142370512708534e-05, "loss": 1.0336, "step": 45470 }, { "epoch": 3.97, "learning_rate": 3.013800331906717e-05, "loss": 0.7971, "step": 45480 }, { "epoch": 3.97, "learning_rate": 3.01336361254258e-05, "loss": 0.9997, "step": 45490 }, { "epoch": 3.97, "learning_rate": 3.0129268931784437e-05, "loss": 0.8555, "step": 45500 }, { "epoch": 3.98, "learning_rate": 3.012490173814307e-05, "loss": 0.9566, "step": 45510 }, { "epoch": 3.98, "learning_rate": 3.0120534544501707e-05, "loss": 0.8611, "step": 45520 }, { "epoch": 3.98, "learning_rate": 3.0116167350860337e-05, "loss": 0.922, "step": 45530 }, { "epoch": 3.98, "learning_rate": 3.0111800157218973e-05, "loss": 0.7972, "step": 45540 }, { "epoch": 3.98, "learning_rate": 3.010743296357761e-05, "loss": 0.7603, "step": 45550 }, { "epoch": 3.98, "learning_rate": 3.010306576993624e-05, "loss": 0.8694, "step": 45560 }, { "epoch": 3.98, "learning_rate": 3.0098698576294876e-05, "loss": 0.828, "step": 45570 }, { "epoch": 3.98, "learning_rate": 3.0094331382653506e-05, "loss": 0.8802, "step": 45580 }, { "epoch": 3.98, "learning_rate": 3.0089964189012143e-05, "loss": 0.9077, "step": 45590 }, { "epoch": 3.98, "learning_rate": 3.0085596995370772e-05, "loss": 0.7925, "step": 45600 }, { "epoch": 3.98, "learning_rate": 3.008122980172941e-05, "loss": 0.9141, "step": 45610 }, { "epoch": 3.98, "learning_rate": 3.0076862608088046e-05, "loss": 0.8307, "step": 45620 }, { "epoch": 3.99, "learning_rate": 3.007249541444668e-05, "loss": 0.8519, "step": 45630 }, { "epoch": 3.99, "learning_rate": 3.0068128220805315e-05, "loss": 0.9298, "step": 45640 }, { "epoch": 3.99, "learning_rate": 3.0063761027163945e-05, "loss": 0.856, "step": 45650 }, { "epoch": 3.99, "learning_rate": 3.0059393833522582e-05, "loss": 0.7765, "step": 45660 }, { "epoch": 3.99, "learning_rate": 3.005502663988121e-05, "loss": 0.9012, "step": 45670 }, { "epoch": 3.99, "learning_rate": 3.0050659446239848e-05, "loss": 0.7748, "step": 45680 }, { "epoch": 3.99, "learning_rate": 3.0046292252598478e-05, "loss": 0.8492, "step": 45690 }, { "epoch": 3.99, "learning_rate": 3.0041925058957115e-05, "loss": 1.0521, "step": 45700 }, { "epoch": 3.99, "learning_rate": 3.003755786531575e-05, "loss": 0.8673, "step": 45710 }, { "epoch": 3.99, "learning_rate": 3.0033190671674384e-05, "loss": 0.9512, "step": 45720 }, { "epoch": 3.99, "learning_rate": 3.0028823478033018e-05, "loss": 0.8225, "step": 45730 }, { "epoch": 4.0, "learning_rate": 3.002445628439165e-05, "loss": 0.73, "step": 45740 }, { "epoch": 4.0, "learning_rate": 3.0020089090750287e-05, "loss": 1.0381, "step": 45750 }, { "epoch": 4.0, "learning_rate": 3.0015721897108917e-05, "loss": 0.7668, "step": 45760 }, { "epoch": 4.0, "learning_rate": 3.0011354703467554e-05, "loss": 0.9559, "step": 45770 }, { "epoch": 4.0, "learning_rate": 3.000698750982619e-05, "loss": 0.8025, "step": 45780 }, { "epoch": 4.0, "learning_rate": 3.000262031618482e-05, "loss": 0.9393, "step": 45790 }, { "epoch": 4.0, "learning_rate": 2.9998253122543457e-05, "loss": 0.9736, "step": 45800 }, { "epoch": 4.0, "learning_rate": 2.9993885928902087e-05, "loss": 0.8795, "step": 45810 }, { "epoch": 4.0, "learning_rate": 2.9989518735260723e-05, "loss": 0.8743, "step": 45820 }, { "epoch": 4.0, "learning_rate": 2.9985151541619356e-05, "loss": 0.8894, "step": 45830 }, { "epoch": 4.0, "learning_rate": 2.9980784347977993e-05, "loss": 0.8282, "step": 45840 }, { "epoch": 4.0, "learning_rate": 2.9976417154336623e-05, "loss": 0.9567, "step": 45850 }, { "epoch": 4.01, "learning_rate": 2.997204996069526e-05, "loss": 0.8828, "step": 45860 }, { "epoch": 4.01, "learning_rate": 2.9967682767053896e-05, "loss": 0.876, "step": 45870 }, { "epoch": 4.01, "learning_rate": 2.9963315573412526e-05, "loss": 0.9182, "step": 45880 }, { "epoch": 4.01, "learning_rate": 2.9958948379771162e-05, "loss": 0.9173, "step": 45890 }, { "epoch": 4.01, "learning_rate": 2.9954581186129792e-05, "loss": 0.7591, "step": 45900 }, { "epoch": 4.01, "learning_rate": 2.995021399248843e-05, "loss": 0.8418, "step": 45910 }, { "epoch": 4.01, "learning_rate": 2.9945846798847062e-05, "loss": 1.0019, "step": 45920 }, { "epoch": 4.01, "learning_rate": 2.9941479605205695e-05, "loss": 0.9312, "step": 45930 }, { "epoch": 4.01, "learning_rate": 2.993711241156433e-05, "loss": 0.8966, "step": 45940 }, { "epoch": 4.01, "learning_rate": 2.9932745217922965e-05, "loss": 0.8913, "step": 45950 }, { "epoch": 4.01, "learning_rate": 2.99283780242816e-05, "loss": 0.9801, "step": 45960 }, { "epoch": 4.02, "learning_rate": 2.992401083064023e-05, "loss": 0.863, "step": 45970 }, { "epoch": 4.02, "learning_rate": 2.9919643636998868e-05, "loss": 0.7963, "step": 45980 }, { "epoch": 4.02, "learning_rate": 2.9915276443357498e-05, "loss": 0.99, "step": 45990 }, { "epoch": 4.02, "learning_rate": 2.9910909249716134e-05, "loss": 0.8926, "step": 46000 }, { "epoch": 4.02, "learning_rate": 2.9906542056074764e-05, "loss": 0.8966, "step": 46010 }, { "epoch": 4.02, "learning_rate": 2.99021748624334e-05, "loss": 0.8947, "step": 46020 }, { "epoch": 4.02, "learning_rate": 2.9897807668792037e-05, "loss": 0.8433, "step": 46030 }, { "epoch": 4.02, "learning_rate": 2.989344047515067e-05, "loss": 0.8201, "step": 46040 }, { "epoch": 4.02, "learning_rate": 2.9889073281509307e-05, "loss": 0.9368, "step": 46050 }, { "epoch": 4.02, "learning_rate": 2.9884706087867937e-05, "loss": 0.8678, "step": 46060 }, { "epoch": 4.02, "learning_rate": 2.9880338894226574e-05, "loss": 0.87, "step": 46070 }, { "epoch": 4.02, "learning_rate": 2.9875971700585203e-05, "loss": 0.7811, "step": 46080 }, { "epoch": 4.03, "learning_rate": 2.987160450694384e-05, "loss": 0.9418, "step": 46090 }, { "epoch": 4.03, "learning_rate": 2.986723731330247e-05, "loss": 0.8981, "step": 46100 }, { "epoch": 4.03, "learning_rate": 2.9862870119661106e-05, "loss": 0.9443, "step": 46110 }, { "epoch": 4.03, "learning_rate": 2.9858502926019743e-05, "loss": 0.8358, "step": 46120 }, { "epoch": 4.03, "learning_rate": 2.9854135732378373e-05, "loss": 0.8623, "step": 46130 }, { "epoch": 4.03, "learning_rate": 2.984976853873701e-05, "loss": 0.894, "step": 46140 }, { "epoch": 4.03, "learning_rate": 2.9845401345095643e-05, "loss": 0.9561, "step": 46150 }, { "epoch": 4.03, "learning_rate": 2.984103415145428e-05, "loss": 0.8907, "step": 46160 }, { "epoch": 4.03, "learning_rate": 2.983666695781291e-05, "loss": 0.8886, "step": 46170 }, { "epoch": 4.03, "learning_rate": 2.9832299764171546e-05, "loss": 0.9706, "step": 46180 }, { "epoch": 4.03, "learning_rate": 2.9827932570530175e-05, "loss": 0.8676, "step": 46190 }, { "epoch": 4.04, "learning_rate": 2.9823565376888812e-05, "loss": 0.8603, "step": 46200 }, { "epoch": 4.04, "learning_rate": 2.981919818324745e-05, "loss": 0.9466, "step": 46210 }, { "epoch": 4.04, "learning_rate": 2.981483098960608e-05, "loss": 0.8214, "step": 46220 }, { "epoch": 4.04, "learning_rate": 2.9810463795964715e-05, "loss": 0.942, "step": 46230 }, { "epoch": 4.04, "learning_rate": 2.9806096602323348e-05, "loss": 0.9323, "step": 46240 }, { "epoch": 4.04, "learning_rate": 2.9801729408681985e-05, "loss": 0.8226, "step": 46250 }, { "epoch": 4.04, "learning_rate": 2.9797362215040615e-05, "loss": 1.0107, "step": 46260 }, { "epoch": 4.04, "learning_rate": 2.979299502139925e-05, "loss": 0.75, "step": 46270 }, { "epoch": 4.04, "learning_rate": 2.9788627827757888e-05, "loss": 0.9405, "step": 46280 }, { "epoch": 4.04, "learning_rate": 2.9784260634116518e-05, "loss": 1.0055, "step": 46290 }, { "epoch": 4.04, "learning_rate": 2.9779893440475154e-05, "loss": 0.9042, "step": 46300 }, { "epoch": 4.04, "learning_rate": 2.9775526246833784e-05, "loss": 0.8492, "step": 46310 }, { "epoch": 4.05, "learning_rate": 2.977115905319242e-05, "loss": 0.8208, "step": 46320 }, { "epoch": 4.05, "learning_rate": 2.976679185955105e-05, "loss": 0.8849, "step": 46330 }, { "epoch": 4.05, "learning_rate": 2.9762424665909687e-05, "loss": 0.8596, "step": 46340 }, { "epoch": 4.05, "learning_rate": 2.975805747226832e-05, "loss": 0.9543, "step": 46350 }, { "epoch": 4.05, "learning_rate": 2.9753690278626957e-05, "loss": 0.8814, "step": 46360 }, { "epoch": 4.05, "learning_rate": 2.9749323084985593e-05, "loss": 0.8814, "step": 46370 }, { "epoch": 4.05, "learning_rate": 2.9744955891344223e-05, "loss": 0.9316, "step": 46380 }, { "epoch": 4.05, "learning_rate": 2.974058869770286e-05, "loss": 0.97, "step": 46390 }, { "epoch": 4.05, "learning_rate": 2.973622150406149e-05, "loss": 0.8944, "step": 46400 }, { "epoch": 4.05, "learning_rate": 2.9731854310420126e-05, "loss": 0.846, "step": 46410 }, { "epoch": 4.05, "learning_rate": 2.9727487116778756e-05, "loss": 0.8988, "step": 46420 }, { "epoch": 4.06, "learning_rate": 2.9723119923137393e-05, "loss": 0.9072, "step": 46430 }, { "epoch": 4.06, "learning_rate": 2.971875272949603e-05, "loss": 0.8363, "step": 46440 }, { "epoch": 4.06, "learning_rate": 2.9714385535854662e-05, "loss": 0.8746, "step": 46450 }, { "epoch": 4.06, "learning_rate": 2.9710018342213296e-05, "loss": 0.9768, "step": 46460 }, { "epoch": 4.06, "learning_rate": 2.970565114857193e-05, "loss": 0.8954, "step": 46470 }, { "epoch": 4.06, "learning_rate": 2.9701283954930565e-05, "loss": 0.7925, "step": 46480 }, { "epoch": 4.06, "learning_rate": 2.9696916761289195e-05, "loss": 0.9509, "step": 46490 }, { "epoch": 4.06, "learning_rate": 2.9692549567647832e-05, "loss": 0.8706, "step": 46500 }, { "epoch": 4.06, "learning_rate": 2.968818237400646e-05, "loss": 0.8901, "step": 46510 }, { "epoch": 4.06, "learning_rate": 2.9683815180365098e-05, "loss": 0.8951, "step": 46520 }, { "epoch": 4.06, "learning_rate": 2.9679447986723735e-05, "loss": 0.7429, "step": 46530 }, { "epoch": 4.06, "learning_rate": 2.9675080793082365e-05, "loss": 0.8361, "step": 46540 }, { "epoch": 4.07, "learning_rate": 2.9670713599441e-05, "loss": 0.7856, "step": 46550 }, { "epoch": 4.07, "learning_rate": 2.9666346405799634e-05, "loss": 0.8591, "step": 46560 }, { "epoch": 4.07, "learning_rate": 2.966197921215827e-05, "loss": 0.8336, "step": 46570 }, { "epoch": 4.07, "learning_rate": 2.96576120185169e-05, "loss": 0.9421, "step": 46580 }, { "epoch": 4.07, "learning_rate": 2.9653244824875537e-05, "loss": 0.8635, "step": 46590 }, { "epoch": 4.07, "learning_rate": 2.9648877631234167e-05, "loss": 0.9162, "step": 46600 }, { "epoch": 4.07, "learning_rate": 2.9644510437592804e-05, "loss": 1.0216, "step": 46610 }, { "epoch": 4.07, "learning_rate": 2.964014324395144e-05, "loss": 0.8152, "step": 46620 }, { "epoch": 4.07, "learning_rate": 2.963577605031007e-05, "loss": 0.9732, "step": 46630 }, { "epoch": 4.07, "learning_rate": 2.9631408856668707e-05, "loss": 0.8709, "step": 46640 }, { "epoch": 4.07, "learning_rate": 2.962704166302734e-05, "loss": 0.8436, "step": 46650 }, { "epoch": 4.08, "learning_rate": 2.9622674469385973e-05, "loss": 0.8937, "step": 46660 }, { "epoch": 4.08, "learning_rate": 2.9618307275744606e-05, "loss": 0.7893, "step": 46670 }, { "epoch": 4.08, "learning_rate": 2.9613940082103243e-05, "loss": 1.0346, "step": 46680 }, { "epoch": 4.08, "learning_rate": 2.960957288846188e-05, "loss": 0.9177, "step": 46690 }, { "epoch": 4.08, "learning_rate": 2.960520569482051e-05, "loss": 0.9168, "step": 46700 }, { "epoch": 4.08, "learning_rate": 2.9600838501179146e-05, "loss": 0.9156, "step": 46710 }, { "epoch": 4.08, "learning_rate": 2.9596471307537776e-05, "loss": 0.9249, "step": 46720 }, { "epoch": 4.08, "learning_rate": 2.9592104113896412e-05, "loss": 0.9468, "step": 46730 }, { "epoch": 4.08, "learning_rate": 2.9587736920255042e-05, "loss": 0.8264, "step": 46740 }, { "epoch": 4.08, "learning_rate": 2.958336972661368e-05, "loss": 0.9827, "step": 46750 }, { "epoch": 4.08, "learning_rate": 2.9579002532972312e-05, "loss": 1.0482, "step": 46760 }, { "epoch": 4.09, "learning_rate": 2.957463533933095e-05, "loss": 0.9843, "step": 46770 }, { "epoch": 4.09, "learning_rate": 2.9570268145689585e-05, "loss": 0.8671, "step": 46780 }, { "epoch": 4.09, "learning_rate": 2.9565900952048215e-05, "loss": 0.8901, "step": 46790 }, { "epoch": 4.09, "learning_rate": 2.956153375840685e-05, "loss": 0.8905, "step": 46800 }, { "epoch": 4.09, "learning_rate": 2.955716656476548e-05, "loss": 0.8822, "step": 46810 }, { "epoch": 4.09, "learning_rate": 2.9552799371124118e-05, "loss": 0.7658, "step": 46820 }, { "epoch": 4.09, "learning_rate": 2.9548432177482748e-05, "loss": 0.9574, "step": 46830 }, { "epoch": 4.09, "learning_rate": 2.9544064983841384e-05, "loss": 0.985, "step": 46840 }, { "epoch": 4.09, "learning_rate": 2.953969779020002e-05, "loss": 0.9323, "step": 46850 }, { "epoch": 4.09, "learning_rate": 2.953533059655865e-05, "loss": 0.895, "step": 46860 }, { "epoch": 4.09, "learning_rate": 2.9530963402917287e-05, "loss": 0.8801, "step": 46870 }, { "epoch": 4.09, "learning_rate": 2.952659620927592e-05, "loss": 0.822, "step": 46880 }, { "epoch": 4.1, "learning_rate": 2.9522229015634557e-05, "loss": 0.8234, "step": 46890 }, { "epoch": 4.1, "learning_rate": 2.9517861821993187e-05, "loss": 0.8566, "step": 46900 }, { "epoch": 4.1, "learning_rate": 2.9513494628351824e-05, "loss": 0.8322, "step": 46910 }, { "epoch": 4.1, "learning_rate": 2.9509127434710453e-05, "loss": 0.889, "step": 46920 }, { "epoch": 4.1, "learning_rate": 2.950476024106909e-05, "loss": 1.0066, "step": 46930 }, { "epoch": 4.1, "learning_rate": 2.9500393047427727e-05, "loss": 1.0715, "step": 46940 }, { "epoch": 4.1, "learning_rate": 2.9496025853786356e-05, "loss": 0.8517, "step": 46950 }, { "epoch": 4.1, "learning_rate": 2.9491658660144993e-05, "loss": 0.8862, "step": 46960 }, { "epoch": 4.1, "learning_rate": 2.9487291466503626e-05, "loss": 0.7595, "step": 46970 }, { "epoch": 4.1, "learning_rate": 2.9482924272862263e-05, "loss": 0.781, "step": 46980 }, { "epoch": 4.1, "learning_rate": 2.9478557079220893e-05, "loss": 0.9305, "step": 46990 }, { "epoch": 4.11, "learning_rate": 2.947418988557953e-05, "loss": 0.9662, "step": 47000 }, { "epoch": 4.11, "learning_rate": 2.946982269193816e-05, "loss": 0.9968, "step": 47010 }, { "epoch": 4.11, "learning_rate": 2.9465455498296796e-05, "loss": 0.7811, "step": 47020 }, { "epoch": 4.11, "learning_rate": 2.9461088304655432e-05, "loss": 1.0114, "step": 47030 }, { "epoch": 4.11, "learning_rate": 2.9456721111014062e-05, "loss": 0.8317, "step": 47040 }, { "epoch": 4.11, "learning_rate": 2.94523539173727e-05, "loss": 0.8806, "step": 47050 }, { "epoch": 4.11, "learning_rate": 2.944798672373133e-05, "loss": 0.9007, "step": 47060 }, { "epoch": 4.11, "learning_rate": 2.9443619530089965e-05, "loss": 0.8068, "step": 47070 }, { "epoch": 4.11, "learning_rate": 2.9439252336448598e-05, "loss": 0.7034, "step": 47080 }, { "epoch": 4.11, "learning_rate": 2.9434885142807235e-05, "loss": 0.914, "step": 47090 }, { "epoch": 4.11, "learning_rate": 2.943051794916587e-05, "loss": 0.8116, "step": 47100 }, { "epoch": 4.11, "learning_rate": 2.94261507555245e-05, "loss": 0.9348, "step": 47110 }, { "epoch": 4.12, "learning_rate": 2.9421783561883138e-05, "loss": 0.9456, "step": 47120 }, { "epoch": 4.12, "learning_rate": 2.9417416368241768e-05, "loss": 0.9117, "step": 47130 }, { "epoch": 4.12, "learning_rate": 2.9413049174600404e-05, "loss": 0.736, "step": 47140 }, { "epoch": 4.12, "learning_rate": 2.9408681980959034e-05, "loss": 0.951, "step": 47150 }, { "epoch": 4.12, "learning_rate": 2.940431478731767e-05, "loss": 0.9105, "step": 47160 }, { "epoch": 4.12, "learning_rate": 2.9399947593676304e-05, "loss": 0.9132, "step": 47170 }, { "epoch": 4.12, "learning_rate": 2.939558040003494e-05, "loss": 0.9853, "step": 47180 }, { "epoch": 4.12, "learning_rate": 2.9391213206393574e-05, "loss": 0.847, "step": 47190 }, { "epoch": 4.12, "learning_rate": 2.9386846012752207e-05, "loss": 0.936, "step": 47200 }, { "epoch": 4.12, "learning_rate": 2.9382478819110843e-05, "loss": 0.7409, "step": 47210 }, { "epoch": 4.12, "learning_rate": 2.9378111625469473e-05, "loss": 0.8016, "step": 47220 }, { "epoch": 4.13, "learning_rate": 2.937374443182811e-05, "loss": 0.8288, "step": 47230 }, { "epoch": 4.13, "learning_rate": 2.936937723818674e-05, "loss": 0.8352, "step": 47240 }, { "epoch": 4.13, "learning_rate": 2.9365010044545376e-05, "loss": 0.899, "step": 47250 }, { "epoch": 4.13, "learning_rate": 2.9360642850904013e-05, "loss": 0.8538, "step": 47260 }, { "epoch": 4.13, "learning_rate": 2.9356275657262643e-05, "loss": 0.9462, "step": 47270 }, { "epoch": 4.13, "learning_rate": 2.935190846362128e-05, "loss": 0.8749, "step": 47280 }, { "epoch": 4.13, "learning_rate": 2.9347541269979912e-05, "loss": 0.8485, "step": 47290 }, { "epoch": 4.13, "learning_rate": 2.934317407633855e-05, "loss": 0.7518, "step": 47300 }, { "epoch": 4.13, "learning_rate": 2.933880688269718e-05, "loss": 0.7837, "step": 47310 }, { "epoch": 4.13, "learning_rate": 2.9334439689055815e-05, "loss": 0.8489, "step": 47320 }, { "epoch": 4.13, "learning_rate": 2.9330072495414445e-05, "loss": 0.9278, "step": 47330 }, { "epoch": 4.13, "learning_rate": 2.9325705301773082e-05, "loss": 0.7669, "step": 47340 }, { "epoch": 4.14, "learning_rate": 2.932133810813172e-05, "loss": 0.8854, "step": 47350 }, { "epoch": 4.14, "learning_rate": 2.9316970914490348e-05, "loss": 0.9728, "step": 47360 }, { "epoch": 4.14, "learning_rate": 2.9312603720848985e-05, "loss": 0.8337, "step": 47370 }, { "epoch": 4.14, "learning_rate": 2.9308236527207618e-05, "loss": 0.8744, "step": 47380 }, { "epoch": 4.14, "learning_rate": 2.930386933356625e-05, "loss": 0.9595, "step": 47390 }, { "epoch": 4.14, "learning_rate": 2.9299502139924884e-05, "loss": 0.7903, "step": 47400 }, { "epoch": 4.14, "learning_rate": 2.929513494628352e-05, "loss": 0.8339, "step": 47410 }, { "epoch": 4.14, "learning_rate": 2.929076775264215e-05, "loss": 0.9156, "step": 47420 }, { "epoch": 4.14, "learning_rate": 2.9286400559000787e-05, "loss": 0.8118, "step": 47430 }, { "epoch": 4.14, "learning_rate": 2.9282033365359424e-05, "loss": 0.9291, "step": 47440 }, { "epoch": 4.14, "learning_rate": 2.9277666171718054e-05, "loss": 0.7708, "step": 47450 }, { "epoch": 4.15, "learning_rate": 2.927329897807669e-05, "loss": 0.844, "step": 47460 }, { "epoch": 4.15, "learning_rate": 2.926893178443532e-05, "loss": 0.8055, "step": 47470 }, { "epoch": 4.15, "learning_rate": 2.9264564590793957e-05, "loss": 0.8492, "step": 47480 }, { "epoch": 4.15, "learning_rate": 2.926019739715259e-05, "loss": 0.7858, "step": 47490 }, { "epoch": 4.15, "learning_rate": 2.9255830203511227e-05, "loss": 0.8332, "step": 47500 }, { "epoch": 4.15, "learning_rate": 2.9251463009869863e-05, "loss": 0.8276, "step": 47510 }, { "epoch": 4.15, "learning_rate": 2.9247095816228493e-05, "loss": 0.7937, "step": 47520 }, { "epoch": 4.15, "learning_rate": 2.924272862258713e-05, "loss": 1.0103, "step": 47530 }, { "epoch": 4.15, "learning_rate": 2.923836142894576e-05, "loss": 0.8972, "step": 47540 }, { "epoch": 4.15, "learning_rate": 2.9233994235304396e-05, "loss": 0.8074, "step": 47550 }, { "epoch": 4.15, "learning_rate": 2.9229627041663026e-05, "loss": 0.8554, "step": 47560 }, { "epoch": 4.15, "learning_rate": 2.9225259848021662e-05, "loss": 0.8267, "step": 47570 }, { "epoch": 4.16, "learning_rate": 2.9220892654380296e-05, "loss": 0.8382, "step": 47580 }, { "epoch": 4.16, "learning_rate": 2.921652546073893e-05, "loss": 1.0097, "step": 47590 }, { "epoch": 4.16, "learning_rate": 2.9212158267097565e-05, "loss": 0.8414, "step": 47600 }, { "epoch": 4.16, "learning_rate": 2.92077910734562e-05, "loss": 0.8303, "step": 47610 }, { "epoch": 4.16, "learning_rate": 2.9203423879814835e-05, "loss": 0.8084, "step": 47620 }, { "epoch": 4.16, "learning_rate": 2.9199056686173465e-05, "loss": 0.9145, "step": 47630 }, { "epoch": 4.16, "learning_rate": 2.91946894925321e-05, "loss": 0.9749, "step": 47640 }, { "epoch": 4.16, "learning_rate": 2.919032229889073e-05, "loss": 0.8931, "step": 47650 }, { "epoch": 4.16, "learning_rate": 2.9185955105249368e-05, "loss": 1.0061, "step": 47660 }, { "epoch": 4.16, "learning_rate": 2.9181587911608005e-05, "loss": 0.9355, "step": 47670 }, { "epoch": 4.16, "learning_rate": 2.9177220717966634e-05, "loss": 0.8528, "step": 47680 }, { "epoch": 4.17, "learning_rate": 2.917285352432527e-05, "loss": 0.8113, "step": 47690 }, { "epoch": 4.17, "learning_rate": 2.9168486330683904e-05, "loss": 0.9424, "step": 47700 }, { "epoch": 4.17, "learning_rate": 2.916411913704254e-05, "loss": 0.9089, "step": 47710 }, { "epoch": 4.17, "learning_rate": 2.915975194340117e-05, "loss": 0.8289, "step": 47720 }, { "epoch": 4.17, "learning_rate": 2.9155384749759807e-05, "loss": 0.8696, "step": 47730 }, { "epoch": 4.17, "learning_rate": 2.9151017556118437e-05, "loss": 1.0264, "step": 47740 }, { "epoch": 4.17, "learning_rate": 2.9146650362477074e-05, "loss": 0.8761, "step": 47750 }, { "epoch": 4.17, "learning_rate": 2.914228316883571e-05, "loss": 0.8621, "step": 47760 }, { "epoch": 4.17, "learning_rate": 2.913791597519434e-05, "loss": 0.8321, "step": 47770 }, { "epoch": 4.17, "learning_rate": 2.9133548781552977e-05, "loss": 0.8086, "step": 47780 }, { "epoch": 4.17, "learning_rate": 2.9129181587911606e-05, "loss": 0.8128, "step": 47790 }, { "epoch": 4.18, "learning_rate": 2.9124814394270243e-05, "loss": 0.9915, "step": 47800 }, { "epoch": 4.18, "learning_rate": 2.9120447200628876e-05, "loss": 0.936, "step": 47810 }, { "epoch": 4.18, "learning_rate": 2.9116080006987513e-05, "loss": 0.9603, "step": 47820 }, { "epoch": 4.18, "learning_rate": 2.9111712813346143e-05, "loss": 0.9005, "step": 47830 }, { "epoch": 4.18, "learning_rate": 2.910734561970478e-05, "loss": 0.9068, "step": 47840 }, { "epoch": 4.18, "learning_rate": 2.9102978426063416e-05, "loss": 0.9367, "step": 47850 }, { "epoch": 4.18, "learning_rate": 2.9098611232422046e-05, "loss": 0.8917, "step": 47860 }, { "epoch": 4.18, "learning_rate": 2.9094244038780682e-05, "loss": 0.9423, "step": 47870 }, { "epoch": 4.18, "learning_rate": 2.9089876845139312e-05, "loss": 0.874, "step": 47880 }, { "epoch": 4.18, "learning_rate": 2.908550965149795e-05, "loss": 0.8871, "step": 47890 }, { "epoch": 4.18, "learning_rate": 2.9081142457856582e-05, "loss": 0.8915, "step": 47900 }, { "epoch": 4.18, "learning_rate": 2.907677526421522e-05, "loss": 0.9965, "step": 47910 }, { "epoch": 4.19, "learning_rate": 2.907240807057385e-05, "loss": 0.7923, "step": 47920 }, { "epoch": 4.19, "learning_rate": 2.9068040876932485e-05, "loss": 0.8433, "step": 47930 }, { "epoch": 4.19, "learning_rate": 2.906367368329112e-05, "loss": 0.8637, "step": 47940 }, { "epoch": 4.19, "learning_rate": 2.905930648964975e-05, "loss": 0.9572, "step": 47950 }, { "epoch": 4.19, "learning_rate": 2.9054939296008388e-05, "loss": 0.9074, "step": 47960 }, { "epoch": 4.19, "learning_rate": 2.9050572102367018e-05, "loss": 0.8719, "step": 47970 }, { "epoch": 4.19, "learning_rate": 2.9046204908725654e-05, "loss": 0.8981, "step": 47980 }, { "epoch": 4.19, "learning_rate": 2.9041837715084284e-05, "loss": 0.8426, "step": 47990 }, { "epoch": 4.19, "learning_rate": 2.903747052144292e-05, "loss": 0.8483, "step": 48000 }, { "epoch": 4.19, "learning_rate": 2.9033103327801557e-05, "loss": 0.9385, "step": 48010 }, { "epoch": 4.19, "learning_rate": 2.902873613416019e-05, "loss": 0.7388, "step": 48020 }, { "epoch": 4.2, "learning_rate": 2.9024368940518827e-05, "loss": 0.9115, "step": 48030 }, { "epoch": 4.2, "learning_rate": 2.9020001746877457e-05, "loss": 0.8709, "step": 48040 }, { "epoch": 4.2, "learning_rate": 2.9015634553236093e-05, "loss": 0.8805, "step": 48050 }, { "epoch": 4.2, "learning_rate": 2.9011267359594723e-05, "loss": 0.8901, "step": 48060 }, { "epoch": 4.2, "learning_rate": 2.900690016595336e-05, "loss": 0.8732, "step": 48070 }, { "epoch": 4.2, "learning_rate": 2.900253297231199e-05, "loss": 0.7889, "step": 48080 }, { "epoch": 4.2, "learning_rate": 2.8998165778670626e-05, "loss": 0.8812, "step": 48090 }, { "epoch": 4.2, "learning_rate": 2.8993798585029263e-05, "loss": 0.965, "step": 48100 }, { "epoch": 4.2, "learning_rate": 2.8989431391387896e-05, "loss": 0.8175, "step": 48110 }, { "epoch": 4.2, "learning_rate": 2.898506419774653e-05, "loss": 0.9049, "step": 48120 }, { "epoch": 4.2, "learning_rate": 2.8980697004105162e-05, "loss": 0.7993, "step": 48130 }, { "epoch": 4.2, "learning_rate": 2.89763298104638e-05, "loss": 0.8509, "step": 48140 }, { "epoch": 4.21, "learning_rate": 2.897196261682243e-05, "loss": 0.6907, "step": 48150 }, { "epoch": 4.21, "learning_rate": 2.8967595423181065e-05, "loss": 0.8816, "step": 48160 }, { "epoch": 4.21, "learning_rate": 2.8963228229539702e-05, "loss": 0.9293, "step": 48170 }, { "epoch": 4.21, "learning_rate": 2.8958861035898332e-05, "loss": 0.7908, "step": 48180 }, { "epoch": 4.21, "learning_rate": 2.895449384225697e-05, "loss": 0.8887, "step": 48190 }, { "epoch": 4.21, "learning_rate": 2.8950126648615598e-05, "loss": 0.9547, "step": 48200 }, { "epoch": 4.21, "learning_rate": 2.8945759454974235e-05, "loss": 0.9372, "step": 48210 }, { "epoch": 4.21, "learning_rate": 2.8941392261332868e-05, "loss": 0.8436, "step": 48220 }, { "epoch": 4.21, "learning_rate": 2.8937025067691505e-05, "loss": 0.933, "step": 48230 }, { "epoch": 4.21, "learning_rate": 2.8932657874050134e-05, "loss": 0.9671, "step": 48240 }, { "epoch": 4.21, "learning_rate": 2.892829068040877e-05, "loss": 0.8237, "step": 48250 }, { "epoch": 4.22, "learning_rate": 2.8923923486767408e-05, "loss": 0.8398, "step": 48260 }, { "epoch": 4.22, "learning_rate": 2.8919556293126037e-05, "loss": 0.7766, "step": 48270 }, { "epoch": 4.22, "learning_rate": 2.8915189099484674e-05, "loss": 0.8836, "step": 48280 }, { "epoch": 4.22, "learning_rate": 2.8910821905843304e-05, "loss": 0.859, "step": 48290 }, { "epoch": 4.22, "learning_rate": 2.890645471220194e-05, "loss": 0.8867, "step": 48300 }, { "epoch": 4.22, "learning_rate": 2.8902087518560574e-05, "loss": 0.8227, "step": 48310 }, { "epoch": 4.22, "learning_rate": 2.8897720324919207e-05, "loss": 0.9903, "step": 48320 }, { "epoch": 4.22, "learning_rate": 2.8893353131277843e-05, "loss": 0.9506, "step": 48330 }, { "epoch": 4.22, "learning_rate": 2.8888985937636477e-05, "loss": 0.7098, "step": 48340 }, { "epoch": 4.22, "learning_rate": 2.8884618743995113e-05, "loss": 0.9377, "step": 48350 }, { "epoch": 4.22, "learning_rate": 2.8880251550353743e-05, "loss": 0.901, "step": 48360 }, { "epoch": 4.22, "learning_rate": 2.887588435671238e-05, "loss": 0.9249, "step": 48370 }, { "epoch": 4.23, "learning_rate": 2.887151716307101e-05, "loss": 0.907, "step": 48380 }, { "epoch": 4.23, "learning_rate": 2.8867149969429646e-05, "loss": 0.8738, "step": 48390 }, { "epoch": 4.23, "learning_rate": 2.8862782775788276e-05, "loss": 0.9187, "step": 48400 }, { "epoch": 4.23, "learning_rate": 2.8858415582146912e-05, "loss": 0.8533, "step": 48410 }, { "epoch": 4.23, "learning_rate": 2.885404838850555e-05, "loss": 0.8591, "step": 48420 }, { "epoch": 4.23, "learning_rate": 2.8849681194864182e-05, "loss": 0.8546, "step": 48430 }, { "epoch": 4.23, "learning_rate": 2.884531400122282e-05, "loss": 0.8939, "step": 48440 }, { "epoch": 4.23, "learning_rate": 2.884094680758145e-05, "loss": 0.8961, "step": 48450 }, { "epoch": 4.23, "learning_rate": 2.8836579613940085e-05, "loss": 0.8983, "step": 48460 }, { "epoch": 4.23, "learning_rate": 2.8832212420298715e-05, "loss": 0.8736, "step": 48470 }, { "epoch": 4.23, "learning_rate": 2.882784522665735e-05, "loss": 0.8697, "step": 48480 }, { "epoch": 4.24, "learning_rate": 2.882347803301598e-05, "loss": 0.9471, "step": 48490 }, { "epoch": 4.24, "learning_rate": 2.8819110839374618e-05, "loss": 0.8573, "step": 48500 }, { "epoch": 4.24, "learning_rate": 2.8814743645733255e-05, "loss": 1.0047, "step": 48510 }, { "epoch": 4.24, "learning_rate": 2.8810376452091888e-05, "loss": 0.9206, "step": 48520 }, { "epoch": 4.24, "learning_rate": 2.880600925845052e-05, "loss": 0.8542, "step": 48530 }, { "epoch": 4.24, "learning_rate": 2.8801642064809154e-05, "loss": 1.0006, "step": 48540 }, { "epoch": 4.24, "learning_rate": 2.879727487116779e-05, "loss": 0.8561, "step": 48550 }, { "epoch": 4.24, "learning_rate": 2.879290767752642e-05, "loss": 1.0208, "step": 48560 }, { "epoch": 4.24, "learning_rate": 2.8788540483885057e-05, "loss": 1.1535, "step": 48570 }, { "epoch": 4.24, "learning_rate": 2.8784173290243694e-05, "loss": 0.8649, "step": 48580 }, { "epoch": 4.24, "learning_rate": 2.8779806096602324e-05, "loss": 0.9155, "step": 48590 }, { "epoch": 4.24, "learning_rate": 2.877543890296096e-05, "loss": 0.879, "step": 48600 }, { "epoch": 4.25, "learning_rate": 2.877107170931959e-05, "loss": 0.9848, "step": 48610 }, { "epoch": 4.25, "learning_rate": 2.8766704515678227e-05, "loss": 0.8501, "step": 48620 }, { "epoch": 4.25, "learning_rate": 2.876233732203686e-05, "loss": 0.8846, "step": 48630 }, { "epoch": 4.25, "learning_rate": 2.8757970128395496e-05, "loss": 0.8981, "step": 48640 }, { "epoch": 4.25, "learning_rate": 2.8753602934754126e-05, "loss": 0.8244, "step": 48650 }, { "epoch": 4.25, "learning_rate": 2.8749235741112763e-05, "loss": 0.9315, "step": 48660 }, { "epoch": 4.25, "learning_rate": 2.87448685474714e-05, "loss": 1.0133, "step": 48670 }, { "epoch": 4.25, "learning_rate": 2.874050135383003e-05, "loss": 0.8367, "step": 48680 }, { "epoch": 4.25, "learning_rate": 2.8736134160188666e-05, "loss": 0.8068, "step": 48690 }, { "epoch": 4.25, "learning_rate": 2.8731766966547296e-05, "loss": 0.8953, "step": 48700 }, { "epoch": 4.25, "learning_rate": 2.8727399772905932e-05, "loss": 0.7962, "step": 48710 }, { "epoch": 4.26, "learning_rate": 2.8723032579264565e-05, "loss": 0.8876, "step": 48720 }, { "epoch": 4.26, "learning_rate": 2.87186653856232e-05, "loss": 0.9402, "step": 48730 }, { "epoch": 4.26, "learning_rate": 2.8714298191981835e-05, "loss": 0.8801, "step": 48740 }, { "epoch": 4.26, "learning_rate": 2.870993099834047e-05, "loss": 0.9538, "step": 48750 }, { "epoch": 4.26, "learning_rate": 2.8705563804699105e-05, "loss": 0.7389, "step": 48760 }, { "epoch": 4.26, "learning_rate": 2.8701196611057735e-05, "loss": 1.013, "step": 48770 }, { "epoch": 4.26, "learning_rate": 2.869682941741637e-05, "loss": 0.9694, "step": 48780 }, { "epoch": 4.26, "learning_rate": 2.8692462223775e-05, "loss": 0.779, "step": 48790 }, { "epoch": 4.26, "learning_rate": 2.8688095030133638e-05, "loss": 0.95, "step": 48800 }, { "epoch": 4.26, "learning_rate": 2.8683727836492268e-05, "loss": 0.9433, "step": 48810 }, { "epoch": 4.26, "learning_rate": 2.8679360642850904e-05, "loss": 0.8604, "step": 48820 }, { "epoch": 4.27, "learning_rate": 2.867499344920954e-05, "loss": 0.9229, "step": 48830 }, { "epoch": 4.27, "learning_rate": 2.8670626255568174e-05, "loss": 0.7865, "step": 48840 }, { "epoch": 4.27, "learning_rate": 2.866625906192681e-05, "loss": 0.8222, "step": 48850 }, { "epoch": 4.27, "learning_rate": 2.866189186828544e-05, "loss": 0.7373, "step": 48860 }, { "epoch": 4.27, "learning_rate": 2.8657524674644077e-05, "loss": 0.8741, "step": 48870 }, { "epoch": 4.27, "learning_rate": 2.8653157481002707e-05, "loss": 0.9254, "step": 48880 }, { "epoch": 4.27, "learning_rate": 2.8648790287361343e-05, "loss": 0.862, "step": 48890 }, { "epoch": 4.27, "learning_rate": 2.8644423093719973e-05, "loss": 0.7541, "step": 48900 }, { "epoch": 4.27, "learning_rate": 2.864005590007861e-05, "loss": 0.8518, "step": 48910 }, { "epoch": 4.27, "learning_rate": 2.8635688706437246e-05, "loss": 1.0443, "step": 48920 }, { "epoch": 4.27, "learning_rate": 2.8631321512795876e-05, "loss": 0.9768, "step": 48930 }, { "epoch": 4.27, "learning_rate": 2.8626954319154513e-05, "loss": 0.9358, "step": 48940 }, { "epoch": 4.28, "learning_rate": 2.8622587125513146e-05, "loss": 0.7868, "step": 48950 }, { "epoch": 4.28, "learning_rate": 2.8618219931871783e-05, "loss": 0.8959, "step": 48960 }, { "epoch": 4.28, "learning_rate": 2.8613852738230412e-05, "loss": 0.9027, "step": 48970 }, { "epoch": 4.28, "learning_rate": 2.860948554458905e-05, "loss": 0.9417, "step": 48980 }, { "epoch": 4.28, "learning_rate": 2.8605118350947686e-05, "loss": 0.8306, "step": 48990 }, { "epoch": 4.28, "learning_rate": 2.8600751157306315e-05, "loss": 0.7898, "step": 49000 }, { "epoch": 4.28, "learning_rate": 2.8596383963664952e-05, "loss": 0.7752, "step": 49010 }, { "epoch": 4.28, "learning_rate": 2.8592016770023582e-05, "loss": 0.7348, "step": 49020 }, { "epoch": 4.28, "learning_rate": 2.858764957638222e-05, "loss": 0.9539, "step": 49030 }, { "epoch": 4.28, "learning_rate": 2.858328238274085e-05, "loss": 0.8523, "step": 49040 }, { "epoch": 4.28, "learning_rate": 2.8578915189099488e-05, "loss": 0.8524, "step": 49050 }, { "epoch": 4.29, "learning_rate": 2.8574547995458118e-05, "loss": 1.0381, "step": 49060 }, { "epoch": 4.29, "learning_rate": 2.8570180801816755e-05, "loss": 0.8943, "step": 49070 }, { "epoch": 4.29, "learning_rate": 2.856581360817539e-05, "loss": 0.8085, "step": 49080 }, { "epoch": 4.29, "learning_rate": 2.856144641453402e-05, "loss": 0.8704, "step": 49090 }, { "epoch": 4.29, "learning_rate": 2.8557079220892658e-05, "loss": 0.7374, "step": 49100 }, { "epoch": 4.29, "learning_rate": 2.8552712027251287e-05, "loss": 0.922, "step": 49110 }, { "epoch": 4.29, "learning_rate": 2.8548344833609924e-05, "loss": 0.7355, "step": 49120 }, { "epoch": 4.29, "learning_rate": 2.8543977639968554e-05, "loss": 0.889, "step": 49130 }, { "epoch": 4.29, "learning_rate": 2.853961044632719e-05, "loss": 0.9085, "step": 49140 }, { "epoch": 4.29, "learning_rate": 2.8535243252685827e-05, "loss": 1.0381, "step": 49150 }, { "epoch": 4.29, "learning_rate": 2.853087605904446e-05, "loss": 0.8736, "step": 49160 }, { "epoch": 4.29, "learning_rate": 2.8526508865403097e-05, "loss": 0.8804, "step": 49170 }, { "epoch": 4.3, "learning_rate": 2.8522141671761727e-05, "loss": 0.7933, "step": 49180 }, { "epoch": 4.3, "learning_rate": 2.8517774478120363e-05, "loss": 0.9249, "step": 49190 }, { "epoch": 4.3, "learning_rate": 2.8513407284478993e-05, "loss": 0.8075, "step": 49200 }, { "epoch": 4.3, "learning_rate": 2.850904009083763e-05, "loss": 0.8596, "step": 49210 }, { "epoch": 4.3, "learning_rate": 2.850467289719626e-05, "loss": 0.7291, "step": 49220 }, { "epoch": 4.3, "learning_rate": 2.8500305703554896e-05, "loss": 0.8535, "step": 49230 }, { "epoch": 4.3, "learning_rate": 2.8495938509913533e-05, "loss": 0.8739, "step": 49240 }, { "epoch": 4.3, "learning_rate": 2.8491571316272166e-05, "loss": 0.8194, "step": 49250 }, { "epoch": 4.3, "learning_rate": 2.84872041226308e-05, "loss": 0.8347, "step": 49260 }, { "epoch": 4.3, "learning_rate": 2.8482836928989432e-05, "loss": 0.9477, "step": 49270 }, { "epoch": 4.3, "learning_rate": 2.847846973534807e-05, "loss": 0.9257, "step": 49280 }, { "epoch": 4.31, "learning_rate": 2.84741025417067e-05, "loss": 0.8989, "step": 49290 }, { "epoch": 4.31, "learning_rate": 2.8469735348065335e-05, "loss": 0.7497, "step": 49300 }, { "epoch": 4.31, "learning_rate": 2.8465368154423965e-05, "loss": 0.9616, "step": 49310 }, { "epoch": 4.31, "learning_rate": 2.8461000960782602e-05, "loss": 0.826, "step": 49320 }, { "epoch": 4.31, "learning_rate": 2.8456633767141238e-05, "loss": 0.8198, "step": 49330 }, { "epoch": 4.31, "learning_rate": 2.8452266573499868e-05, "loss": 0.769, "step": 49340 }, { "epoch": 4.31, "learning_rate": 2.8447899379858505e-05, "loss": 0.8449, "step": 49350 }, { "epoch": 4.31, "learning_rate": 2.8443532186217138e-05, "loss": 0.9357, "step": 49360 }, { "epoch": 4.31, "learning_rate": 2.8439164992575774e-05, "loss": 0.9361, "step": 49370 }, { "epoch": 4.31, "learning_rate": 2.8434797798934404e-05, "loss": 0.9587, "step": 49380 }, { "epoch": 4.31, "learning_rate": 2.843043060529304e-05, "loss": 0.8624, "step": 49390 }, { "epoch": 4.31, "learning_rate": 2.8426063411651677e-05, "loss": 0.808, "step": 49400 }, { "epoch": 4.32, "learning_rate": 2.8421696218010307e-05, "loss": 0.8381, "step": 49410 }, { "epoch": 4.32, "learning_rate": 2.8417329024368944e-05, "loss": 0.7786, "step": 49420 }, { "epoch": 4.32, "learning_rate": 2.8412961830727574e-05, "loss": 0.9594, "step": 49430 }, { "epoch": 4.32, "learning_rate": 2.840859463708621e-05, "loss": 0.923, "step": 49440 }, { "epoch": 4.32, "learning_rate": 2.8404227443444843e-05, "loss": 0.8015, "step": 49450 }, { "epoch": 4.32, "learning_rate": 2.8399860249803477e-05, "loss": 0.9256, "step": 49460 }, { "epoch": 4.32, "learning_rate": 2.839549305616211e-05, "loss": 0.783, "step": 49470 }, { "epoch": 4.32, "learning_rate": 2.8391125862520746e-05, "loss": 0.9908, "step": 49480 }, { "epoch": 4.32, "learning_rate": 2.8386758668879383e-05, "loss": 0.969, "step": 49490 }, { "epoch": 4.32, "learning_rate": 2.8382391475238013e-05, "loss": 0.8541, "step": 49500 }, { "epoch": 4.32, "learning_rate": 2.837802428159665e-05, "loss": 0.8594, "step": 49510 }, { "epoch": 4.33, "learning_rate": 2.837365708795528e-05, "loss": 0.8937, "step": 49520 }, { "epoch": 4.33, "learning_rate": 2.8369289894313916e-05, "loss": 1.008, "step": 49530 }, { "epoch": 4.33, "learning_rate": 2.8364922700672546e-05, "loss": 0.8565, "step": 49540 }, { "epoch": 4.33, "learning_rate": 2.8360555507031182e-05, "loss": 0.8464, "step": 49550 }, { "epoch": 4.33, "learning_rate": 2.8356188313389816e-05, "loss": 0.8221, "step": 49560 }, { "epoch": 4.33, "learning_rate": 2.8351821119748452e-05, "loss": 0.88, "step": 49570 }, { "epoch": 4.33, "learning_rate": 2.834745392610709e-05, "loss": 0.9033, "step": 49580 }, { "epoch": 4.33, "learning_rate": 2.834308673246572e-05, "loss": 0.846, "step": 49590 }, { "epoch": 4.33, "learning_rate": 2.8338719538824355e-05, "loss": 1.0387, "step": 49600 }, { "epoch": 4.33, "learning_rate": 2.8334352345182985e-05, "loss": 0.8555, "step": 49610 }, { "epoch": 4.33, "learning_rate": 2.832998515154162e-05, "loss": 0.7715, "step": 49620 }, { "epoch": 4.33, "learning_rate": 2.832561795790025e-05, "loss": 0.7288, "step": 49630 }, { "epoch": 4.34, "learning_rate": 2.8321250764258888e-05, "loss": 0.794, "step": 49640 }, { "epoch": 4.34, "learning_rate": 2.8316883570617524e-05, "loss": 0.854, "step": 49650 }, { "epoch": 4.34, "learning_rate": 2.8312516376976154e-05, "loss": 0.8998, "step": 49660 }, { "epoch": 4.34, "learning_rate": 2.830814918333479e-05, "loss": 0.8891, "step": 49670 }, { "epoch": 4.34, "learning_rate": 2.8303781989693424e-05, "loss": 0.8297, "step": 49680 }, { "epoch": 4.34, "learning_rate": 2.829941479605206e-05, "loss": 0.9197, "step": 49690 }, { "epoch": 4.34, "learning_rate": 2.829504760241069e-05, "loss": 0.9529, "step": 49700 }, { "epoch": 4.34, "learning_rate": 2.8290680408769327e-05, "loss": 0.9582, "step": 49710 }, { "epoch": 4.34, "learning_rate": 2.8286313215127957e-05, "loss": 0.9344, "step": 49720 }, { "epoch": 4.34, "learning_rate": 2.8281946021486594e-05, "loss": 0.842, "step": 49730 }, { "epoch": 4.34, "learning_rate": 2.827757882784523e-05, "loss": 0.9042, "step": 49740 }, { "epoch": 4.35, "learning_rate": 2.827321163420386e-05, "loss": 1.0033, "step": 49750 }, { "epoch": 4.35, "learning_rate": 2.8268844440562497e-05, "loss": 0.8856, "step": 49760 }, { "epoch": 4.35, "learning_rate": 2.826447724692113e-05, "loss": 0.8221, "step": 49770 }, { "epoch": 4.35, "learning_rate": 2.8260110053279766e-05, "loss": 0.8408, "step": 49780 }, { "epoch": 4.35, "learning_rate": 2.8255742859638396e-05, "loss": 0.8462, "step": 49790 }, { "epoch": 4.35, "learning_rate": 2.8251375665997033e-05, "loss": 0.8831, "step": 49800 }, { "epoch": 4.35, "learning_rate": 2.824700847235567e-05, "loss": 0.8752, "step": 49810 }, { "epoch": 4.35, "learning_rate": 2.82426412787143e-05, "loss": 0.9838, "step": 49820 }, { "epoch": 4.35, "learning_rate": 2.8238274085072936e-05, "loss": 0.8275, "step": 49830 }, { "epoch": 4.35, "learning_rate": 2.8233906891431566e-05, "loss": 1.0077, "step": 49840 }, { "epoch": 4.35, "learning_rate": 2.8229539697790202e-05, "loss": 0.7754, "step": 49850 }, { "epoch": 4.35, "learning_rate": 2.8225172504148832e-05, "loss": 0.9604, "step": 49860 }, { "epoch": 4.36, "learning_rate": 2.822080531050747e-05, "loss": 0.869, "step": 49870 }, { "epoch": 4.36, "learning_rate": 2.8216438116866102e-05, "loss": 0.7783, "step": 49880 }, { "epoch": 4.36, "learning_rate": 2.821207092322474e-05, "loss": 0.9096, "step": 49890 }, { "epoch": 4.36, "learning_rate": 2.8207703729583375e-05, "loss": 0.92, "step": 49900 }, { "epoch": 4.36, "learning_rate": 2.8203336535942005e-05, "loss": 0.8469, "step": 49910 }, { "epoch": 4.36, "learning_rate": 2.819896934230064e-05, "loss": 0.8407, "step": 49920 }, { "epoch": 4.36, "learning_rate": 2.819460214865927e-05, "loss": 0.9062, "step": 49930 }, { "epoch": 4.36, "learning_rate": 2.8190234955017908e-05, "loss": 0.9228, "step": 49940 }, { "epoch": 4.36, "learning_rate": 2.8185867761376538e-05, "loss": 0.8237, "step": 49950 }, { "epoch": 4.36, "learning_rate": 2.8181500567735174e-05, "loss": 0.8247, "step": 49960 }, { "epoch": 4.36, "learning_rate": 2.8177133374093807e-05, "loss": 0.837, "step": 49970 }, { "epoch": 4.37, "learning_rate": 2.8172766180452444e-05, "loss": 0.9636, "step": 49980 }, { "epoch": 4.37, "learning_rate": 2.8168398986811077e-05, "loss": 0.9108, "step": 49990 }, { "epoch": 4.37, "learning_rate": 2.816403179316971e-05, "loss": 0.8099, "step": 50000 }, { "epoch": 4.37, "eval_accuracy": 0.5754151518129115, "eval_loss": 0.8863910436630249, "eval_runtime": 84.1986, "eval_samples_per_second": 120.869, "eval_steps_per_second": 15.119, "step": 50000 }, { "epoch": 4.37, "learning_rate": 2.8159664599528347e-05, "loss": 0.8883, "step": 50010 }, { "epoch": 4.37, "learning_rate": 2.8155297405886977e-05, "loss": 0.8777, "step": 50020 }, { "epoch": 4.37, "learning_rate": 2.8150930212245613e-05, "loss": 0.8417, "step": 50030 }, { "epoch": 4.37, "learning_rate": 2.8146563018604243e-05, "loss": 0.9245, "step": 50040 }, { "epoch": 4.37, "learning_rate": 2.814219582496288e-05, "loss": 0.8368, "step": 50050 }, { "epoch": 4.37, "learning_rate": 2.8137828631321516e-05, "loss": 0.9576, "step": 50060 }, { "epoch": 4.37, "learning_rate": 2.8133461437680146e-05, "loss": 0.9094, "step": 50070 }, { "epoch": 4.37, "learning_rate": 2.8129094244038783e-05, "loss": 0.8602, "step": 50080 }, { "epoch": 4.38, "learning_rate": 2.8124727050397416e-05, "loss": 0.8823, "step": 50090 }, { "epoch": 4.38, "learning_rate": 2.8120359856756053e-05, "loss": 0.8287, "step": 50100 }, { "epoch": 4.38, "learning_rate": 2.8115992663114682e-05, "loss": 0.7785, "step": 50110 }, { "epoch": 4.38, "learning_rate": 2.811162546947332e-05, "loss": 0.9924, "step": 50120 }, { "epoch": 4.38, "learning_rate": 2.810725827583195e-05, "loss": 0.9532, "step": 50130 }, { "epoch": 4.38, "learning_rate": 2.8102891082190585e-05, "loss": 0.8489, "step": 50140 }, { "epoch": 4.38, "learning_rate": 2.8098523888549222e-05, "loss": 0.9488, "step": 50150 }, { "epoch": 4.38, "learning_rate": 2.8094156694907852e-05, "loss": 0.9704, "step": 50160 }, { "epoch": 4.38, "learning_rate": 2.808978950126649e-05, "loss": 0.8629, "step": 50170 }, { "epoch": 4.38, "learning_rate": 2.808542230762512e-05, "loss": 0.8692, "step": 50180 }, { "epoch": 4.38, "learning_rate": 2.8081055113983755e-05, "loss": 1.0382, "step": 50190 }, { "epoch": 4.38, "learning_rate": 2.8076687920342388e-05, "loss": 0.8193, "step": 50200 }, { "epoch": 4.39, "learning_rate": 2.8072320726701025e-05, "loss": 0.835, "step": 50210 }, { "epoch": 4.39, "learning_rate": 2.806795353305966e-05, "loss": 0.8955, "step": 50220 }, { "epoch": 4.39, "learning_rate": 2.806358633941829e-05, "loss": 0.9197, "step": 50230 }, { "epoch": 4.39, "learning_rate": 2.8059219145776928e-05, "loss": 0.919, "step": 50240 }, { "epoch": 4.39, "learning_rate": 2.8054851952135557e-05, "loss": 0.8837, "step": 50250 }, { "epoch": 4.39, "learning_rate": 2.8050484758494194e-05, "loss": 0.7989, "step": 50260 }, { "epoch": 4.39, "learning_rate": 2.8046117564852824e-05, "loss": 0.8399, "step": 50270 }, { "epoch": 4.39, "learning_rate": 2.804175037121146e-05, "loss": 0.9146, "step": 50280 }, { "epoch": 4.39, "learning_rate": 2.8037383177570094e-05, "loss": 0.834, "step": 50290 }, { "epoch": 4.39, "learning_rate": 2.803301598392873e-05, "loss": 0.9669, "step": 50300 }, { "epoch": 4.39, "learning_rate": 2.8028648790287367e-05, "loss": 0.8202, "step": 50310 }, { "epoch": 4.4, "learning_rate": 2.8024281596645997e-05, "loss": 0.9342, "step": 50320 }, { "epoch": 4.4, "learning_rate": 2.8019914403004633e-05, "loss": 1.0321, "step": 50330 }, { "epoch": 4.4, "learning_rate": 2.8015547209363263e-05, "loss": 0.8839, "step": 50340 }, { "epoch": 4.4, "learning_rate": 2.80111800157219e-05, "loss": 0.864, "step": 50350 }, { "epoch": 4.4, "learning_rate": 2.800681282208053e-05, "loss": 0.8691, "step": 50360 }, { "epoch": 4.4, "learning_rate": 2.8002445628439166e-05, "loss": 0.9019, "step": 50370 }, { "epoch": 4.4, "learning_rate": 2.79980784347978e-05, "loss": 0.8615, "step": 50380 }, { "epoch": 4.4, "learning_rate": 2.7993711241156432e-05, "loss": 0.812, "step": 50390 }, { "epoch": 4.4, "learning_rate": 2.798934404751507e-05, "loss": 0.8467, "step": 50400 }, { "epoch": 4.4, "learning_rate": 2.7984976853873702e-05, "loss": 0.8314, "step": 50410 }, { "epoch": 4.4, "learning_rate": 2.798060966023234e-05, "loss": 0.8616, "step": 50420 }, { "epoch": 4.4, "learning_rate": 2.797624246659097e-05, "loss": 0.9252, "step": 50430 }, { "epoch": 4.41, "learning_rate": 2.7971875272949605e-05, "loss": 0.8041, "step": 50440 }, { "epoch": 4.41, "learning_rate": 2.7967508079308235e-05, "loss": 0.8564, "step": 50450 }, { "epoch": 4.41, "learning_rate": 2.796314088566687e-05, "loss": 0.9507, "step": 50460 }, { "epoch": 4.41, "learning_rate": 2.7958773692025508e-05, "loss": 0.979, "step": 50470 }, { "epoch": 4.41, "learning_rate": 2.7954406498384138e-05, "loss": 0.7535, "step": 50480 }, { "epoch": 4.41, "learning_rate": 2.7950039304742775e-05, "loss": 0.928, "step": 50490 }, { "epoch": 4.41, "learning_rate": 2.7945672111101408e-05, "loss": 0.8172, "step": 50500 }, { "epoch": 4.41, "learning_rate": 2.7941304917460044e-05, "loss": 0.7476, "step": 50510 }, { "epoch": 4.41, "learning_rate": 2.7936937723818674e-05, "loss": 0.9157, "step": 50520 }, { "epoch": 4.41, "learning_rate": 2.793257053017731e-05, "loss": 0.933, "step": 50530 }, { "epoch": 4.41, "learning_rate": 2.792820333653594e-05, "loss": 0.9482, "step": 50540 }, { "epoch": 4.42, "learning_rate": 2.7923836142894577e-05, "loss": 0.969, "step": 50550 }, { "epoch": 4.42, "learning_rate": 2.7919468949253214e-05, "loss": 0.8968, "step": 50560 }, { "epoch": 4.42, "learning_rate": 2.7915101755611844e-05, "loss": 0.8897, "step": 50570 }, { "epoch": 4.42, "learning_rate": 2.791073456197048e-05, "loss": 0.8132, "step": 50580 }, { "epoch": 4.42, "learning_rate": 2.790636736832911e-05, "loss": 0.9695, "step": 50590 }, { "epoch": 4.42, "learning_rate": 2.7902000174687747e-05, "loss": 0.8795, "step": 50600 }, { "epoch": 4.42, "learning_rate": 2.789763298104638e-05, "loss": 0.9206, "step": 50610 }, { "epoch": 4.42, "learning_rate": 2.7893265787405016e-05, "loss": 0.9773, "step": 50620 }, { "epoch": 4.42, "learning_rate": 2.7888898593763653e-05, "loss": 0.9016, "step": 50630 }, { "epoch": 4.42, "learning_rate": 2.7884531400122283e-05, "loss": 0.8783, "step": 50640 }, { "epoch": 4.42, "learning_rate": 2.788016420648092e-05, "loss": 0.8445, "step": 50650 }, { "epoch": 4.42, "learning_rate": 2.787579701283955e-05, "loss": 0.8421, "step": 50660 }, { "epoch": 4.43, "learning_rate": 2.7871429819198186e-05, "loss": 0.8704, "step": 50670 }, { "epoch": 4.43, "learning_rate": 2.7867062625556816e-05, "loss": 0.9181, "step": 50680 }, { "epoch": 4.43, "learning_rate": 2.7862695431915452e-05, "loss": 0.9497, "step": 50690 }, { "epoch": 4.43, "learning_rate": 2.7858328238274085e-05, "loss": 0.973, "step": 50700 }, { "epoch": 4.43, "learning_rate": 2.7853961044632722e-05, "loss": 0.917, "step": 50710 }, { "epoch": 4.43, "learning_rate": 2.7849593850991355e-05, "loss": 0.951, "step": 50720 }, { "epoch": 4.43, "learning_rate": 2.784522665734999e-05, "loss": 0.9254, "step": 50730 }, { "epoch": 4.43, "learning_rate": 2.7840859463708625e-05, "loss": 0.8762, "step": 50740 }, { "epoch": 4.43, "learning_rate": 2.7836492270067255e-05, "loss": 0.9183, "step": 50750 }, { "epoch": 4.43, "learning_rate": 2.783212507642589e-05, "loss": 0.867, "step": 50760 }, { "epoch": 4.43, "learning_rate": 2.782775788278452e-05, "loss": 0.9603, "step": 50770 }, { "epoch": 4.44, "learning_rate": 2.7823390689143158e-05, "loss": 0.8531, "step": 50780 }, { "epoch": 4.44, "learning_rate": 2.7819023495501788e-05, "loss": 0.916, "step": 50790 }, { "epoch": 4.44, "learning_rate": 2.7814656301860424e-05, "loss": 0.8962, "step": 50800 }, { "epoch": 4.44, "learning_rate": 2.781028910821906e-05, "loss": 0.9269, "step": 50810 }, { "epoch": 4.44, "learning_rate": 2.7805921914577694e-05, "loss": 0.9167, "step": 50820 }, { "epoch": 4.44, "learning_rate": 2.780155472093633e-05, "loss": 0.8884, "step": 50830 }, { "epoch": 4.44, "learning_rate": 2.779718752729496e-05, "loss": 0.8476, "step": 50840 }, { "epoch": 4.44, "learning_rate": 2.7792820333653597e-05, "loss": 0.9615, "step": 50850 }, { "epoch": 4.44, "learning_rate": 2.7788453140012227e-05, "loss": 0.8374, "step": 50860 }, { "epoch": 4.44, "learning_rate": 2.7784085946370863e-05, "loss": 0.8967, "step": 50870 }, { "epoch": 4.44, "learning_rate": 2.77797187527295e-05, "loss": 0.8982, "step": 50880 }, { "epoch": 4.44, "learning_rate": 2.777535155908813e-05, "loss": 0.9548, "step": 50890 }, { "epoch": 4.45, "learning_rate": 2.7770984365446766e-05, "loss": 0.9519, "step": 50900 }, { "epoch": 4.45, "learning_rate": 2.77666171718054e-05, "loss": 0.8568, "step": 50910 }, { "epoch": 4.45, "learning_rate": 2.7762249978164033e-05, "loss": 0.8103, "step": 50920 }, { "epoch": 4.45, "learning_rate": 2.7757882784522666e-05, "loss": 0.8566, "step": 50930 }, { "epoch": 4.45, "learning_rate": 2.7753515590881303e-05, "loss": 0.8873, "step": 50940 }, { "epoch": 4.45, "learning_rate": 2.7749148397239932e-05, "loss": 0.883, "step": 50950 }, { "epoch": 4.45, "learning_rate": 2.774478120359857e-05, "loss": 0.8632, "step": 50960 }, { "epoch": 4.45, "learning_rate": 2.7740414009957206e-05, "loss": 0.9126, "step": 50970 }, { "epoch": 4.45, "learning_rate": 2.7736046816315835e-05, "loss": 0.8972, "step": 50980 }, { "epoch": 4.45, "learning_rate": 2.7731679622674472e-05, "loss": 0.9381, "step": 50990 }, { "epoch": 4.45, "learning_rate": 2.7727312429033102e-05, "loss": 0.8102, "step": 51000 }, { "epoch": 4.46, "learning_rate": 2.772294523539174e-05, "loss": 0.8851, "step": 51010 }, { "epoch": 4.46, "learning_rate": 2.771857804175037e-05, "loss": 0.8995, "step": 51020 }, { "epoch": 4.46, "learning_rate": 2.7714210848109008e-05, "loss": 0.9324, "step": 51030 }, { "epoch": 4.46, "learning_rate": 2.7709843654467645e-05, "loss": 0.9292, "step": 51040 }, { "epoch": 4.46, "learning_rate": 2.7705476460826275e-05, "loss": 0.7683, "step": 51050 }, { "epoch": 4.46, "learning_rate": 2.770110926718491e-05, "loss": 0.95, "step": 51060 }, { "epoch": 4.46, "learning_rate": 2.769674207354354e-05, "loss": 0.9039, "step": 51070 }, { "epoch": 4.46, "learning_rate": 2.7692374879902178e-05, "loss": 0.9339, "step": 51080 }, { "epoch": 4.46, "learning_rate": 2.7688007686260807e-05, "loss": 1.0771, "step": 51090 }, { "epoch": 4.46, "learning_rate": 2.7683640492619444e-05, "loss": 1.0357, "step": 51100 }, { "epoch": 4.46, "learning_rate": 2.7679273298978077e-05, "loss": 0.8968, "step": 51110 }, { "epoch": 4.47, "learning_rate": 2.767490610533671e-05, "loss": 0.9715, "step": 51120 }, { "epoch": 4.47, "learning_rate": 2.7670538911695347e-05, "loss": 0.8875, "step": 51130 }, { "epoch": 4.47, "learning_rate": 2.766617171805398e-05, "loss": 0.8234, "step": 51140 }, { "epoch": 4.47, "learning_rate": 2.7661804524412617e-05, "loss": 0.8381, "step": 51150 }, { "epoch": 4.47, "learning_rate": 2.7657437330771247e-05, "loss": 0.9416, "step": 51160 }, { "epoch": 4.47, "learning_rate": 2.7653070137129883e-05, "loss": 0.8877, "step": 51170 }, { "epoch": 4.47, "learning_rate": 2.7648702943488513e-05, "loss": 0.9444, "step": 51180 }, { "epoch": 4.47, "learning_rate": 2.764433574984715e-05, "loss": 0.9283, "step": 51190 }, { "epoch": 4.47, "learning_rate": 2.763996855620578e-05, "loss": 0.8619, "step": 51200 }, { "epoch": 4.47, "learning_rate": 2.7635601362564416e-05, "loss": 0.945, "step": 51210 }, { "epoch": 4.47, "learning_rate": 2.7631234168923053e-05, "loss": 0.9075, "step": 51220 }, { "epoch": 4.47, "learning_rate": 2.7626866975281686e-05, "loss": 0.9403, "step": 51230 }, { "epoch": 4.48, "learning_rate": 2.7622499781640322e-05, "loss": 0.7913, "step": 51240 }, { "epoch": 4.48, "learning_rate": 2.7618132587998952e-05, "loss": 0.8673, "step": 51250 }, { "epoch": 4.48, "learning_rate": 2.761376539435759e-05, "loss": 0.7985, "step": 51260 }, { "epoch": 4.48, "learning_rate": 2.760939820071622e-05, "loss": 0.8848, "step": 51270 }, { "epoch": 4.48, "learning_rate": 2.7605031007074855e-05, "loss": 0.9118, "step": 51280 }, { "epoch": 4.48, "learning_rate": 2.7600663813433492e-05, "loss": 0.9504, "step": 51290 }, { "epoch": 4.48, "learning_rate": 2.759629661979212e-05, "loss": 0.9089, "step": 51300 }, { "epoch": 4.48, "learning_rate": 2.7591929426150758e-05, "loss": 0.8518, "step": 51310 }, { "epoch": 4.48, "learning_rate": 2.7587562232509388e-05, "loss": 0.8427, "step": 51320 }, { "epoch": 4.48, "learning_rate": 2.7583195038868025e-05, "loss": 0.9588, "step": 51330 }, { "epoch": 4.48, "learning_rate": 2.7578827845226658e-05, "loss": 0.8075, "step": 51340 }, { "epoch": 4.49, "learning_rate": 2.7574460651585294e-05, "loss": 0.9166, "step": 51350 }, { "epoch": 4.49, "learning_rate": 2.7570093457943924e-05, "loss": 0.7756, "step": 51360 }, { "epoch": 4.49, "learning_rate": 2.756572626430256e-05, "loss": 0.9401, "step": 51370 }, { "epoch": 4.49, "learning_rate": 2.7561359070661197e-05, "loss": 0.9293, "step": 51380 }, { "epoch": 4.49, "learning_rate": 2.7556991877019827e-05, "loss": 0.7768, "step": 51390 }, { "epoch": 4.49, "learning_rate": 2.7552624683378464e-05, "loss": 0.8161, "step": 51400 }, { "epoch": 4.49, "learning_rate": 2.7548257489737094e-05, "loss": 0.8559, "step": 51410 }, { "epoch": 4.49, "learning_rate": 2.754389029609573e-05, "loss": 0.9159, "step": 51420 }, { "epoch": 4.49, "learning_rate": 2.7539523102454363e-05, "loss": 0.8039, "step": 51430 }, { "epoch": 4.49, "learning_rate": 2.7535155908813e-05, "loss": 0.8494, "step": 51440 }, { "epoch": 4.49, "learning_rate": 2.753078871517163e-05, "loss": 0.8639, "step": 51450 }, { "epoch": 4.49, "learning_rate": 2.7526421521530266e-05, "loss": 0.8333, "step": 51460 }, { "epoch": 4.5, "learning_rate": 2.7522054327888903e-05, "loss": 0.8136, "step": 51470 }, { "epoch": 4.5, "learning_rate": 2.7517687134247533e-05, "loss": 0.9056, "step": 51480 }, { "epoch": 4.5, "learning_rate": 2.751331994060617e-05, "loss": 0.8374, "step": 51490 }, { "epoch": 4.5, "learning_rate": 2.75089527469648e-05, "loss": 0.8592, "step": 51500 }, { "epoch": 4.5, "learning_rate": 2.7504585553323436e-05, "loss": 0.8341, "step": 51510 }, { "epoch": 4.5, "learning_rate": 2.7500218359682066e-05, "loss": 0.9611, "step": 51520 }, { "epoch": 4.5, "learning_rate": 2.7495851166040702e-05, "loss": 0.9043, "step": 51530 }, { "epoch": 4.5, "learning_rate": 2.749148397239934e-05, "loss": 0.8977, "step": 51540 }, { "epoch": 4.5, "learning_rate": 2.7487116778757972e-05, "loss": 0.8262, "step": 51550 }, { "epoch": 4.5, "learning_rate": 2.748274958511661e-05, "loss": 0.8775, "step": 51560 }, { "epoch": 4.5, "learning_rate": 2.747838239147524e-05, "loss": 0.8196, "step": 51570 }, { "epoch": 4.51, "learning_rate": 2.7474015197833875e-05, "loss": 0.9007, "step": 51580 }, { "epoch": 4.51, "learning_rate": 2.7469648004192505e-05, "loss": 0.9572, "step": 51590 }, { "epoch": 4.51, "learning_rate": 2.746528081055114e-05, "loss": 0.7817, "step": 51600 }, { "epoch": 4.51, "learning_rate": 2.746091361690977e-05, "loss": 0.9555, "step": 51610 }, { "epoch": 4.51, "learning_rate": 2.7456546423268408e-05, "loss": 0.8681, "step": 51620 }, { "epoch": 4.51, "learning_rate": 2.7452179229627044e-05, "loss": 0.8276, "step": 51630 }, { "epoch": 4.51, "learning_rate": 2.7447812035985678e-05, "loss": 0.8859, "step": 51640 }, { "epoch": 4.51, "learning_rate": 2.744344484234431e-05, "loss": 0.9043, "step": 51650 }, { "epoch": 4.51, "learning_rate": 2.7439077648702944e-05, "loss": 0.7266, "step": 51660 }, { "epoch": 4.51, "learning_rate": 2.743471045506158e-05, "loss": 0.8393, "step": 51670 }, { "epoch": 4.51, "learning_rate": 2.743034326142021e-05, "loss": 0.9375, "step": 51680 }, { "epoch": 4.51, "learning_rate": 2.7425976067778847e-05, "loss": 0.8681, "step": 51690 }, { "epoch": 4.52, "learning_rate": 2.7421608874137484e-05, "loss": 0.8904, "step": 51700 }, { "epoch": 4.52, "learning_rate": 2.7417241680496113e-05, "loss": 0.8502, "step": 51710 }, { "epoch": 4.52, "learning_rate": 2.741287448685475e-05, "loss": 0.9921, "step": 51720 }, { "epoch": 4.52, "learning_rate": 2.740850729321338e-05, "loss": 0.9038, "step": 51730 }, { "epoch": 4.52, "learning_rate": 2.7404140099572016e-05, "loss": 0.8742, "step": 51740 }, { "epoch": 4.52, "learning_rate": 2.739977290593065e-05, "loss": 0.9502, "step": 51750 }, { "epoch": 4.52, "learning_rate": 2.7395405712289286e-05, "loss": 0.8112, "step": 51760 }, { "epoch": 4.52, "learning_rate": 2.7391038518647916e-05, "loss": 1.0889, "step": 51770 }, { "epoch": 4.52, "learning_rate": 2.7386671325006553e-05, "loss": 0.8425, "step": 51780 }, { "epoch": 4.52, "learning_rate": 2.738230413136519e-05, "loss": 0.9371, "step": 51790 }, { "epoch": 4.52, "learning_rate": 2.737793693772382e-05, "loss": 0.8988, "step": 51800 }, { "epoch": 4.53, "learning_rate": 2.7373569744082456e-05, "loss": 0.9163, "step": 51810 }, { "epoch": 4.53, "learning_rate": 2.7369202550441085e-05, "loss": 0.8851, "step": 51820 }, { "epoch": 4.53, "learning_rate": 2.7364835356799722e-05, "loss": 0.9373, "step": 51830 }, { "epoch": 4.53, "learning_rate": 2.7360468163158355e-05, "loss": 0.8972, "step": 51840 }, { "epoch": 4.53, "learning_rate": 2.735610096951699e-05, "loss": 0.9224, "step": 51850 }, { "epoch": 4.53, "learning_rate": 2.735173377587562e-05, "loss": 0.7813, "step": 51860 }, { "epoch": 4.53, "learning_rate": 2.7347366582234258e-05, "loss": 0.9362, "step": 51870 }, { "epoch": 4.53, "learning_rate": 2.7342999388592895e-05, "loss": 0.8219, "step": 51880 }, { "epoch": 4.53, "learning_rate": 2.7338632194951525e-05, "loss": 0.9248, "step": 51890 }, { "epoch": 4.53, "learning_rate": 2.733426500131016e-05, "loss": 0.7997, "step": 51900 }, { "epoch": 4.53, "learning_rate": 2.732989780766879e-05, "loss": 0.9692, "step": 51910 }, { "epoch": 4.53, "learning_rate": 2.7325530614027428e-05, "loss": 0.9046, "step": 51920 }, { "epoch": 4.54, "learning_rate": 2.7321163420386057e-05, "loss": 1.0248, "step": 51930 }, { "epoch": 4.54, "learning_rate": 2.7316796226744694e-05, "loss": 0.9149, "step": 51940 }, { "epoch": 4.54, "learning_rate": 2.731242903310333e-05, "loss": 0.8752, "step": 51950 }, { "epoch": 4.54, "learning_rate": 2.7308061839461964e-05, "loss": 0.9454, "step": 51960 }, { "epoch": 4.54, "learning_rate": 2.73036946458206e-05, "loss": 0.7986, "step": 51970 }, { "epoch": 4.54, "learning_rate": 2.729932745217923e-05, "loss": 0.8016, "step": 51980 }, { "epoch": 4.54, "learning_rate": 2.7294960258537867e-05, "loss": 0.9075, "step": 51990 }, { "epoch": 4.54, "learning_rate": 2.7290593064896497e-05, "loss": 0.8728, "step": 52000 }, { "epoch": 4.54, "learning_rate": 2.7286225871255133e-05, "loss": 0.8521, "step": 52010 }, { "epoch": 4.54, "learning_rate": 2.7281858677613763e-05, "loss": 0.9122, "step": 52020 }, { "epoch": 4.54, "learning_rate": 2.72774914839724e-05, "loss": 0.9481, "step": 52030 }, { "epoch": 4.55, "learning_rate": 2.7273124290331036e-05, "loss": 0.8684, "step": 52040 }, { "epoch": 4.55, "learning_rate": 2.726875709668967e-05, "loss": 0.938, "step": 52050 }, { "epoch": 4.55, "learning_rate": 2.7264389903048303e-05, "loss": 0.9949, "step": 52060 }, { "epoch": 4.55, "learning_rate": 2.7260022709406936e-05, "loss": 0.8675, "step": 52070 }, { "epoch": 4.55, "learning_rate": 2.7255655515765572e-05, "loss": 0.7786, "step": 52080 }, { "epoch": 4.55, "learning_rate": 2.7251288322124202e-05, "loss": 0.8871, "step": 52090 }, { "epoch": 4.55, "learning_rate": 2.724692112848284e-05, "loss": 0.8559, "step": 52100 }, { "epoch": 4.55, "learning_rate": 2.7242553934841475e-05, "loss": 1.0457, "step": 52110 }, { "epoch": 4.55, "learning_rate": 2.7238186741200105e-05, "loss": 0.7573, "step": 52120 }, { "epoch": 4.55, "learning_rate": 2.7233819547558742e-05, "loss": 0.8979, "step": 52130 }, { "epoch": 4.55, "learning_rate": 2.722945235391737e-05, "loss": 0.8907, "step": 52140 }, { "epoch": 4.55, "learning_rate": 2.7225085160276008e-05, "loss": 0.8361, "step": 52150 }, { "epoch": 4.56, "learning_rate": 2.722071796663464e-05, "loss": 0.8802, "step": 52160 }, { "epoch": 4.56, "learning_rate": 2.7216350772993278e-05, "loss": 0.8554, "step": 52170 }, { "epoch": 4.56, "learning_rate": 2.7211983579351908e-05, "loss": 0.7859, "step": 52180 }, { "epoch": 4.56, "learning_rate": 2.7207616385710544e-05, "loss": 0.9052, "step": 52190 }, { "epoch": 4.56, "learning_rate": 2.720324919206918e-05, "loss": 0.8597, "step": 52200 }, { "epoch": 4.56, "learning_rate": 2.719888199842781e-05, "loss": 1.0035, "step": 52210 }, { "epoch": 4.56, "learning_rate": 2.7194514804786447e-05, "loss": 0.7342, "step": 52220 }, { "epoch": 4.56, "learning_rate": 2.7190147611145077e-05, "loss": 0.7895, "step": 52230 }, { "epoch": 4.56, "learning_rate": 2.7185780417503714e-05, "loss": 0.9391, "step": 52240 }, { "epoch": 4.56, "learning_rate": 2.7181413223862347e-05, "loss": 0.9222, "step": 52250 }, { "epoch": 4.56, "learning_rate": 2.717704603022098e-05, "loss": 0.9295, "step": 52260 }, { "epoch": 4.57, "learning_rate": 2.7172678836579613e-05, "loss": 0.9107, "step": 52270 }, { "epoch": 4.57, "learning_rate": 2.716831164293825e-05, "loss": 0.8797, "step": 52280 }, { "epoch": 4.57, "learning_rate": 2.7163944449296887e-05, "loss": 0.966, "step": 52290 }, { "epoch": 4.57, "learning_rate": 2.7159577255655516e-05, "loss": 0.8219, "step": 52300 }, { "epoch": 4.57, "learning_rate": 2.7155210062014153e-05, "loss": 0.7521, "step": 52310 }, { "epoch": 4.57, "learning_rate": 2.7150842868372783e-05, "loss": 0.7858, "step": 52320 }, { "epoch": 4.57, "learning_rate": 2.714647567473142e-05, "loss": 0.9119, "step": 52330 }, { "epoch": 4.57, "learning_rate": 2.714210848109005e-05, "loss": 0.7657, "step": 52340 }, { "epoch": 4.57, "learning_rate": 2.7137741287448686e-05, "loss": 0.9876, "step": 52350 }, { "epoch": 4.57, "learning_rate": 2.7133374093807322e-05, "loss": 0.8559, "step": 52360 }, { "epoch": 4.57, "learning_rate": 2.7129006900165956e-05, "loss": 1.1137, "step": 52370 }, { "epoch": 4.58, "learning_rate": 2.7124639706524592e-05, "loss": 0.7865, "step": 52380 }, { "epoch": 4.58, "learning_rate": 2.7120272512883222e-05, "loss": 0.8612, "step": 52390 }, { "epoch": 4.58, "learning_rate": 2.711590531924186e-05, "loss": 0.8971, "step": 52400 }, { "epoch": 4.58, "learning_rate": 2.711153812560049e-05, "loss": 0.9654, "step": 52410 }, { "epoch": 4.58, "learning_rate": 2.7107170931959125e-05, "loss": 0.9357, "step": 52420 }, { "epoch": 4.58, "learning_rate": 2.7102803738317755e-05, "loss": 0.8314, "step": 52430 }, { "epoch": 4.58, "learning_rate": 2.709843654467639e-05, "loss": 0.966, "step": 52440 }, { "epoch": 4.58, "learning_rate": 2.7094069351035028e-05, "loss": 0.7626, "step": 52450 }, { "epoch": 4.58, "learning_rate": 2.7089702157393658e-05, "loss": 0.8894, "step": 52460 }, { "epoch": 4.58, "learning_rate": 2.7085334963752294e-05, "loss": 0.9412, "step": 52470 }, { "epoch": 4.58, "learning_rate": 2.7080967770110928e-05, "loss": 0.8546, "step": 52480 }, { "epoch": 4.58, "learning_rate": 2.7076600576469564e-05, "loss": 0.842, "step": 52490 }, { "epoch": 4.59, "learning_rate": 2.7072233382828194e-05, "loss": 0.9878, "step": 52500 }, { "epoch": 4.59, "learning_rate": 2.706786618918683e-05, "loss": 0.8086, "step": 52510 }, { "epoch": 4.59, "learning_rate": 2.7063498995545467e-05, "loss": 0.856, "step": 52520 }, { "epoch": 4.59, "learning_rate": 2.7059131801904097e-05, "loss": 0.924, "step": 52530 }, { "epoch": 4.59, "learning_rate": 2.7054764608262734e-05, "loss": 0.8868, "step": 52540 }, { "epoch": 4.59, "learning_rate": 2.7050397414621363e-05, "loss": 0.8841, "step": 52550 }, { "epoch": 4.59, "learning_rate": 2.704603022098e-05, "loss": 0.9149, "step": 52560 }, { "epoch": 4.59, "learning_rate": 2.7041663027338633e-05, "loss": 0.7891, "step": 52570 }, { "epoch": 4.59, "learning_rate": 2.703729583369727e-05, "loss": 0.8058, "step": 52580 }, { "epoch": 4.59, "learning_rate": 2.70329286400559e-05, "loss": 0.8186, "step": 52590 }, { "epoch": 4.59, "learning_rate": 2.7028561446414536e-05, "loss": 0.8894, "step": 52600 }, { "epoch": 4.6, "learning_rate": 2.7024194252773173e-05, "loss": 0.7892, "step": 52610 }, { "epoch": 4.6, "learning_rate": 2.7019827059131803e-05, "loss": 0.8098, "step": 52620 }, { "epoch": 4.6, "learning_rate": 2.701545986549044e-05, "loss": 0.9148, "step": 52630 }, { "epoch": 4.6, "learning_rate": 2.701109267184907e-05, "loss": 0.8191, "step": 52640 }, { "epoch": 4.6, "learning_rate": 2.7006725478207706e-05, "loss": 0.7754, "step": 52650 }, { "epoch": 4.6, "learning_rate": 2.7002358284566335e-05, "loss": 0.7728, "step": 52660 }, { "epoch": 4.6, "learning_rate": 2.6997991090924972e-05, "loss": 0.9807, "step": 52670 }, { "epoch": 4.6, "learning_rate": 2.6993623897283605e-05, "loss": 0.9085, "step": 52680 }, { "epoch": 4.6, "learning_rate": 2.6989256703642242e-05, "loss": 0.8713, "step": 52690 }, { "epoch": 4.6, "learning_rate": 2.698488951000088e-05, "loss": 0.7404, "step": 52700 }, { "epoch": 4.6, "learning_rate": 2.6980522316359508e-05, "loss": 0.8872, "step": 52710 }, { "epoch": 4.6, "learning_rate": 2.6976155122718145e-05, "loss": 0.9434, "step": 52720 }, { "epoch": 4.61, "learning_rate": 2.6971787929076775e-05, "loss": 0.9217, "step": 52730 }, { "epoch": 4.61, "learning_rate": 2.696742073543541e-05, "loss": 0.7396, "step": 52740 }, { "epoch": 4.61, "learning_rate": 2.696305354179404e-05, "loss": 0.8877, "step": 52750 }, { "epoch": 4.61, "learning_rate": 2.6958686348152678e-05, "loss": 0.9327, "step": 52760 }, { "epoch": 4.61, "learning_rate": 2.6954319154511314e-05, "loss": 0.8127, "step": 52770 }, { "epoch": 4.61, "learning_rate": 2.6949951960869947e-05, "loss": 0.729, "step": 52780 }, { "epoch": 4.61, "learning_rate": 2.694558476722858e-05, "loss": 0.8129, "step": 52790 }, { "epoch": 4.61, "learning_rate": 2.6941217573587214e-05, "loss": 0.8797, "step": 52800 }, { "epoch": 4.61, "learning_rate": 2.693685037994585e-05, "loss": 0.8668, "step": 52810 }, { "epoch": 4.61, "learning_rate": 2.693248318630448e-05, "loss": 0.9437, "step": 52820 }, { "epoch": 4.61, "learning_rate": 2.6928115992663117e-05, "loss": 0.8015, "step": 52830 }, { "epoch": 4.62, "learning_rate": 2.6923748799021747e-05, "loss": 0.9317, "step": 52840 }, { "epoch": 4.62, "learning_rate": 2.6919381605380383e-05, "loss": 0.8502, "step": 52850 }, { "epoch": 4.62, "learning_rate": 2.691501441173902e-05, "loss": 0.9412, "step": 52860 }, { "epoch": 4.62, "learning_rate": 2.691064721809765e-05, "loss": 0.8161, "step": 52870 }, { "epoch": 4.62, "learning_rate": 2.6906280024456286e-05, "loss": 0.9253, "step": 52880 }, { "epoch": 4.62, "learning_rate": 2.690191283081492e-05, "loss": 0.7961, "step": 52890 }, { "epoch": 4.62, "learning_rate": 2.6897545637173556e-05, "loss": 0.8466, "step": 52900 }, { "epoch": 4.62, "learning_rate": 2.6893178443532186e-05, "loss": 0.8335, "step": 52910 }, { "epoch": 4.62, "learning_rate": 2.6888811249890822e-05, "loss": 0.8533, "step": 52920 }, { "epoch": 4.62, "learning_rate": 2.6884444056249452e-05, "loss": 0.9839, "step": 52930 }, { "epoch": 4.62, "learning_rate": 2.688007686260809e-05, "loss": 0.7459, "step": 52940 }, { "epoch": 4.62, "learning_rate": 2.6875709668966725e-05, "loss": 0.8996, "step": 52950 }, { "epoch": 4.63, "learning_rate": 2.6871342475325355e-05, "loss": 0.7904, "step": 52960 }, { "epoch": 4.63, "learning_rate": 2.6866975281683992e-05, "loss": 0.8301, "step": 52970 }, { "epoch": 4.63, "learning_rate": 2.6862608088042625e-05, "loss": 1.0082, "step": 52980 }, { "epoch": 4.63, "learning_rate": 2.6858240894401258e-05, "loss": 0.7975, "step": 52990 }, { "epoch": 4.63, "learning_rate": 2.685387370075989e-05, "loss": 0.9125, "step": 53000 }, { "epoch": 4.63, "learning_rate": 2.6849506507118528e-05, "loss": 0.8622, "step": 53010 }, { "epoch": 4.63, "learning_rate": 2.6845139313477165e-05, "loss": 0.7723, "step": 53020 }, { "epoch": 4.63, "learning_rate": 2.6840772119835794e-05, "loss": 0.9523, "step": 53030 }, { "epoch": 4.63, "learning_rate": 2.683640492619443e-05, "loss": 0.8004, "step": 53040 }, { "epoch": 4.63, "learning_rate": 2.683203773255306e-05, "loss": 0.8701, "step": 53050 }, { "epoch": 4.63, "learning_rate": 2.6827670538911697e-05, "loss": 0.9496, "step": 53060 }, { "epoch": 4.64, "learning_rate": 2.6823303345270327e-05, "loss": 0.8708, "step": 53070 }, { "epoch": 4.64, "learning_rate": 2.6818936151628964e-05, "loss": 0.8691, "step": 53080 }, { "epoch": 4.64, "learning_rate": 2.6814568957987597e-05, "loss": 0.9233, "step": 53090 }, { "epoch": 4.64, "learning_rate": 2.6810201764346234e-05, "loss": 0.8821, "step": 53100 }, { "epoch": 4.64, "learning_rate": 2.680583457070487e-05, "loss": 0.8986, "step": 53110 }, { "epoch": 4.64, "learning_rate": 2.68014673770635e-05, "loss": 0.9208, "step": 53120 }, { "epoch": 4.64, "learning_rate": 2.6797100183422137e-05, "loss": 0.8794, "step": 53130 }, { "epoch": 4.64, "learning_rate": 2.6792732989780766e-05, "loss": 0.8729, "step": 53140 }, { "epoch": 4.64, "learning_rate": 2.6788365796139403e-05, "loss": 0.9327, "step": 53150 }, { "epoch": 4.64, "learning_rate": 2.6783998602498033e-05, "loss": 0.9202, "step": 53160 }, { "epoch": 4.64, "learning_rate": 2.677963140885667e-05, "loss": 0.8297, "step": 53170 }, { "epoch": 4.64, "learning_rate": 2.6775264215215306e-05, "loss": 0.9369, "step": 53180 }, { "epoch": 4.65, "learning_rate": 2.6770897021573936e-05, "loss": 0.9092, "step": 53190 }, { "epoch": 4.65, "learning_rate": 2.6766529827932572e-05, "loss": 0.8708, "step": 53200 }, { "epoch": 4.65, "learning_rate": 2.6762162634291206e-05, "loss": 0.7167, "step": 53210 }, { "epoch": 4.65, "learning_rate": 2.6757795440649842e-05, "loss": 0.9535, "step": 53220 }, { "epoch": 4.65, "learning_rate": 2.6753428247008472e-05, "loss": 0.7718, "step": 53230 }, { "epoch": 4.65, "learning_rate": 2.674906105336711e-05, "loss": 0.9555, "step": 53240 }, { "epoch": 4.65, "learning_rate": 2.674469385972574e-05, "loss": 0.9535, "step": 53250 }, { "epoch": 4.65, "learning_rate": 2.6740326666084375e-05, "loss": 0.9096, "step": 53260 }, { "epoch": 4.65, "learning_rate": 2.673595947244301e-05, "loss": 0.8353, "step": 53270 }, { "epoch": 4.65, "learning_rate": 2.673159227880164e-05, "loss": 0.947, "step": 53280 }, { "epoch": 4.65, "learning_rate": 2.6727225085160278e-05, "loss": 0.8138, "step": 53290 }, { "epoch": 4.66, "learning_rate": 2.672285789151891e-05, "loss": 0.8447, "step": 53300 }, { "epoch": 4.66, "learning_rate": 2.6718490697877548e-05, "loss": 0.8711, "step": 53310 }, { "epoch": 4.66, "learning_rate": 2.6714123504236178e-05, "loss": 0.9579, "step": 53320 }, { "epoch": 4.66, "learning_rate": 2.6709756310594814e-05, "loss": 0.7544, "step": 53330 }, { "epoch": 4.66, "learning_rate": 2.6705389116953444e-05, "loss": 0.7646, "step": 53340 }, { "epoch": 4.66, "learning_rate": 2.670102192331208e-05, "loss": 0.8858, "step": 53350 }, { "epoch": 4.66, "learning_rate": 2.6696654729670717e-05, "loss": 0.7818, "step": 53360 }, { "epoch": 4.66, "learning_rate": 2.6692287536029347e-05, "loss": 1.0221, "step": 53370 }, { "epoch": 4.66, "learning_rate": 2.6687920342387984e-05, "loss": 0.9097, "step": 53380 }, { "epoch": 4.66, "learning_rate": 2.6683553148746613e-05, "loss": 1.0267, "step": 53390 }, { "epoch": 4.66, "learning_rate": 2.667918595510525e-05, "loss": 0.9069, "step": 53400 }, { "epoch": 4.67, "learning_rate": 2.6674818761463883e-05, "loss": 0.8845, "step": 53410 }, { "epoch": 4.67, "learning_rate": 2.667045156782252e-05, "loss": 0.933, "step": 53420 }, { "epoch": 4.67, "learning_rate": 2.6666084374181156e-05, "loss": 0.8502, "step": 53430 }, { "epoch": 4.67, "learning_rate": 2.6661717180539786e-05, "loss": 0.902, "step": 53440 }, { "epoch": 4.67, "learning_rate": 2.6657349986898423e-05, "loss": 0.8823, "step": 53450 }, { "epoch": 4.67, "learning_rate": 2.6652982793257053e-05, "loss": 0.8058, "step": 53460 }, { "epoch": 4.67, "learning_rate": 2.664861559961569e-05, "loss": 0.9272, "step": 53470 }, { "epoch": 4.67, "learning_rate": 2.664424840597432e-05, "loss": 0.799, "step": 53480 }, { "epoch": 4.67, "learning_rate": 2.6639881212332956e-05, "loss": 0.9084, "step": 53490 }, { "epoch": 4.67, "learning_rate": 2.663551401869159e-05, "loss": 0.8274, "step": 53500 }, { "epoch": 4.67, "learning_rate": 2.6631146825050225e-05, "loss": 0.8546, "step": 53510 }, { "epoch": 4.67, "learning_rate": 2.662677963140886e-05, "loss": 1.006, "step": 53520 }, { "epoch": 4.68, "learning_rate": 2.6622412437767492e-05, "loss": 0.8003, "step": 53530 }, { "epoch": 4.68, "learning_rate": 2.661804524412613e-05, "loss": 0.8837, "step": 53540 }, { "epoch": 4.68, "learning_rate": 2.6613678050484758e-05, "loss": 0.8675, "step": 53550 }, { "epoch": 4.68, "learning_rate": 2.6609310856843395e-05, "loss": 0.8624, "step": 53560 }, { "epoch": 4.68, "learning_rate": 2.6604943663202025e-05, "loss": 0.8535, "step": 53570 }, { "epoch": 4.68, "learning_rate": 2.660057646956066e-05, "loss": 0.9052, "step": 53580 }, { "epoch": 4.68, "learning_rate": 2.6596209275919298e-05, "loss": 0.7522, "step": 53590 }, { "epoch": 4.68, "learning_rate": 2.6591842082277928e-05, "loss": 0.8664, "step": 53600 }, { "epoch": 4.68, "learning_rate": 2.6587474888636564e-05, "loss": 0.8576, "step": 53610 }, { "epoch": 4.68, "learning_rate": 2.6583107694995197e-05, "loss": 1.0703, "step": 53620 }, { "epoch": 4.68, "learning_rate": 2.6578740501353834e-05, "loss": 0.842, "step": 53630 }, { "epoch": 4.69, "learning_rate": 2.6574373307712464e-05, "loss": 1.011, "step": 53640 }, { "epoch": 4.69, "learning_rate": 2.65700061140711e-05, "loss": 0.9372, "step": 53650 }, { "epoch": 4.69, "learning_rate": 2.656563892042973e-05, "loss": 0.8529, "step": 53660 }, { "epoch": 4.69, "learning_rate": 2.6561271726788367e-05, "loss": 0.7906, "step": 53670 }, { "epoch": 4.69, "learning_rate": 2.6556904533147003e-05, "loss": 0.7929, "step": 53680 }, { "epoch": 4.69, "learning_rate": 2.6552537339505633e-05, "loss": 0.8368, "step": 53690 }, { "epoch": 4.69, "learning_rate": 2.654817014586427e-05, "loss": 0.8592, "step": 53700 }, { "epoch": 4.69, "learning_rate": 2.6543802952222903e-05, "loss": 0.901, "step": 53710 }, { "epoch": 4.69, "learning_rate": 2.6539435758581536e-05, "loss": 0.9135, "step": 53720 }, { "epoch": 4.69, "learning_rate": 2.653506856494017e-05, "loss": 0.7913, "step": 53730 }, { "epoch": 4.69, "learning_rate": 2.6530701371298806e-05, "loss": 0.9645, "step": 53740 }, { "epoch": 4.69, "learning_rate": 2.6526334177657436e-05, "loss": 0.9677, "step": 53750 }, { "epoch": 4.7, "learning_rate": 2.6521966984016072e-05, "loss": 0.8512, "step": 53760 }, { "epoch": 4.7, "learning_rate": 2.651759979037471e-05, "loss": 0.824, "step": 53770 }, { "epoch": 4.7, "learning_rate": 2.651323259673334e-05, "loss": 0.9587, "step": 53780 }, { "epoch": 4.7, "learning_rate": 2.6508865403091975e-05, "loss": 0.9028, "step": 53790 }, { "epoch": 4.7, "learning_rate": 2.6504498209450605e-05, "loss": 0.874, "step": 53800 }, { "epoch": 4.7, "learning_rate": 2.6500131015809242e-05, "loss": 0.759, "step": 53810 }, { "epoch": 4.7, "learning_rate": 2.6495763822167875e-05, "loss": 0.7826, "step": 53820 }, { "epoch": 4.7, "learning_rate": 2.649139662852651e-05, "loss": 1.001, "step": 53830 }, { "epoch": 4.7, "learning_rate": 2.6487029434885148e-05, "loss": 0.9173, "step": 53840 }, { "epoch": 4.7, "learning_rate": 2.6482662241243778e-05, "loss": 0.8377, "step": 53850 }, { "epoch": 4.7, "learning_rate": 2.6478295047602415e-05, "loss": 0.934, "step": 53860 }, { "epoch": 4.71, "learning_rate": 2.6473927853961044e-05, "loss": 0.9216, "step": 53870 }, { "epoch": 4.71, "learning_rate": 2.646956066031968e-05, "loss": 0.9115, "step": 53880 }, { "epoch": 4.71, "learning_rate": 2.646519346667831e-05, "loss": 0.9941, "step": 53890 }, { "epoch": 4.71, "learning_rate": 2.6460826273036947e-05, "loss": 0.9608, "step": 53900 }, { "epoch": 4.71, "learning_rate": 2.645645907939558e-05, "loss": 0.9029, "step": 53910 }, { "epoch": 4.71, "learning_rate": 2.6452091885754214e-05, "loss": 0.9562, "step": 53920 }, { "epoch": 4.71, "learning_rate": 2.644772469211285e-05, "loss": 0.9145, "step": 53930 }, { "epoch": 4.71, "learning_rate": 2.6443357498471484e-05, "loss": 0.843, "step": 53940 }, { "epoch": 4.71, "learning_rate": 2.643899030483012e-05, "loss": 0.8519, "step": 53950 }, { "epoch": 4.71, "learning_rate": 2.643462311118875e-05, "loss": 0.8886, "step": 53960 }, { "epoch": 4.71, "learning_rate": 2.6430255917547387e-05, "loss": 0.9448, "step": 53970 }, { "epoch": 4.71, "learning_rate": 2.6425888723906016e-05, "loss": 0.8815, "step": 53980 }, { "epoch": 4.72, "learning_rate": 2.6421521530264653e-05, "loss": 0.817, "step": 53990 }, { "epoch": 4.72, "learning_rate": 2.641715433662329e-05, "loss": 0.9068, "step": 54000 }, { "epoch": 4.72, "learning_rate": 2.641278714298192e-05, "loss": 0.9485, "step": 54010 }, { "epoch": 4.72, "learning_rate": 2.6408419949340556e-05, "loss": 0.8368, "step": 54020 }, { "epoch": 4.72, "learning_rate": 2.640405275569919e-05, "loss": 0.8676, "step": 54030 }, { "epoch": 4.72, "learning_rate": 2.6399685562057826e-05, "loss": 0.9726, "step": 54040 }, { "epoch": 4.72, "learning_rate": 2.6395318368416456e-05, "loss": 0.7276, "step": 54050 }, { "epoch": 4.72, "learning_rate": 2.6390951174775092e-05, "loss": 0.7817, "step": 54060 }, { "epoch": 4.72, "learning_rate": 2.6386583981133722e-05, "loss": 0.8071, "step": 54070 }, { "epoch": 4.72, "learning_rate": 2.638221678749236e-05, "loss": 0.9776, "step": 54080 }, { "epoch": 4.72, "learning_rate": 2.6377849593850995e-05, "loss": 0.8592, "step": 54090 }, { "epoch": 4.73, "learning_rate": 2.6373482400209625e-05, "loss": 0.8626, "step": 54100 }, { "epoch": 4.73, "learning_rate": 2.636911520656826e-05, "loss": 0.8993, "step": 54110 }, { "epoch": 4.73, "learning_rate": 2.636474801292689e-05, "loss": 0.8891, "step": 54120 }, { "epoch": 4.73, "learning_rate": 2.6360380819285528e-05, "loss": 0.9491, "step": 54130 }, { "epoch": 4.73, "learning_rate": 2.635601362564416e-05, "loss": 0.8439, "step": 54140 }, { "epoch": 4.73, "learning_rate": 2.6351646432002798e-05, "loss": 0.7577, "step": 54150 }, { "epoch": 4.73, "learning_rate": 2.6347279238361428e-05, "loss": 0.8553, "step": 54160 }, { "epoch": 4.73, "learning_rate": 2.6342912044720064e-05, "loss": 0.8841, "step": 54170 }, { "epoch": 4.73, "learning_rate": 2.63385448510787e-05, "loss": 0.8718, "step": 54180 }, { "epoch": 4.73, "learning_rate": 2.633417765743733e-05, "loss": 0.9157, "step": 54190 }, { "epoch": 4.73, "learning_rate": 2.6329810463795967e-05, "loss": 0.8258, "step": 54200 }, { "epoch": 4.73, "learning_rate": 2.6325443270154597e-05, "loss": 0.9284, "step": 54210 }, { "epoch": 4.74, "learning_rate": 2.6321076076513234e-05, "loss": 0.8706, "step": 54220 }, { "epoch": 4.74, "learning_rate": 2.6316708882871867e-05, "loss": 0.8038, "step": 54230 }, { "epoch": 4.74, "learning_rate": 2.6312341689230503e-05, "loss": 0.7496, "step": 54240 }, { "epoch": 4.74, "learning_rate": 2.6307974495589137e-05, "loss": 0.8301, "step": 54250 }, { "epoch": 4.74, "learning_rate": 2.630360730194777e-05, "loss": 0.9627, "step": 54260 }, { "epoch": 4.74, "learning_rate": 2.6299240108306406e-05, "loss": 0.9107, "step": 54270 }, { "epoch": 4.74, "learning_rate": 2.6294872914665036e-05, "loss": 0.8666, "step": 54280 }, { "epoch": 4.74, "learning_rate": 2.6290505721023673e-05, "loss": 0.9129, "step": 54290 }, { "epoch": 4.74, "learning_rate": 2.6286138527382303e-05, "loss": 1.0303, "step": 54300 }, { "epoch": 4.74, "learning_rate": 2.628177133374094e-05, "loss": 0.8727, "step": 54310 }, { "epoch": 4.74, "learning_rate": 2.627740414009957e-05, "loss": 0.8728, "step": 54320 }, { "epoch": 4.75, "learning_rate": 2.6273036946458206e-05, "loss": 0.8487, "step": 54330 }, { "epoch": 4.75, "learning_rate": 2.6268669752816842e-05, "loss": 0.951, "step": 54340 }, { "epoch": 4.75, "learning_rate": 2.6264302559175475e-05, "loss": 0.9997, "step": 54350 }, { "epoch": 4.75, "learning_rate": 2.6259935365534112e-05, "loss": 0.9218, "step": 54360 }, { "epoch": 4.75, "learning_rate": 2.6255568171892742e-05, "loss": 0.8318, "step": 54370 }, { "epoch": 4.75, "learning_rate": 2.625120097825138e-05, "loss": 0.9314, "step": 54380 }, { "epoch": 4.75, "learning_rate": 2.6246833784610008e-05, "loss": 0.7847, "step": 54390 }, { "epoch": 4.75, "learning_rate": 2.6242466590968645e-05, "loss": 1.0151, "step": 54400 }, { "epoch": 4.75, "learning_rate": 2.623809939732728e-05, "loss": 0.899, "step": 54410 }, { "epoch": 4.75, "learning_rate": 2.623373220368591e-05, "loss": 0.891, "step": 54420 }, { "epoch": 4.75, "learning_rate": 2.6229365010044548e-05, "loss": 0.894, "step": 54430 }, { "epoch": 4.76, "learning_rate": 2.622499781640318e-05, "loss": 0.88, "step": 54440 }, { "epoch": 4.76, "learning_rate": 2.6220630622761814e-05, "loss": 0.8373, "step": 54450 }, { "epoch": 4.76, "learning_rate": 2.6216263429120447e-05, "loss": 0.8479, "step": 54460 }, { "epoch": 4.76, "learning_rate": 2.6211896235479084e-05, "loss": 0.847, "step": 54470 }, { "epoch": 4.76, "learning_rate": 2.6207529041837714e-05, "loss": 0.837, "step": 54480 }, { "epoch": 4.76, "learning_rate": 2.620316184819635e-05, "loss": 0.8821, "step": 54490 }, { "epoch": 4.76, "learning_rate": 2.6198794654554987e-05, "loss": 0.9593, "step": 54500 }, { "epoch": 4.76, "learning_rate": 2.6194427460913617e-05, "loss": 0.793, "step": 54510 }, { "epoch": 4.76, "learning_rate": 2.6190060267272253e-05, "loss": 0.8276, "step": 54520 }, { "epoch": 4.76, "learning_rate": 2.6185693073630883e-05, "loss": 0.8175, "step": 54530 }, { "epoch": 4.76, "learning_rate": 2.618132587998952e-05, "loss": 0.921, "step": 54540 }, { "epoch": 4.76, "learning_rate": 2.6176958686348153e-05, "loss": 0.9025, "step": 54550 }, { "epoch": 4.77, "learning_rate": 2.617259149270679e-05, "loss": 0.8719, "step": 54560 }, { "epoch": 4.77, "learning_rate": 2.616822429906542e-05, "loss": 0.8327, "step": 54570 }, { "epoch": 4.77, "learning_rate": 2.6163857105424056e-05, "loss": 0.8303, "step": 54580 }, { "epoch": 4.77, "learning_rate": 2.6159489911782693e-05, "loss": 0.7661, "step": 54590 }, { "epoch": 4.77, "learning_rate": 2.6155122718141323e-05, "loss": 0.8248, "step": 54600 }, { "epoch": 4.77, "learning_rate": 2.615075552449996e-05, "loss": 0.9543, "step": 54610 }, { "epoch": 4.77, "learning_rate": 2.614638833085859e-05, "loss": 0.8287, "step": 54620 }, { "epoch": 4.77, "learning_rate": 2.6142021137217225e-05, "loss": 0.9329, "step": 54630 }, { "epoch": 4.77, "learning_rate": 2.613765394357586e-05, "loss": 0.8607, "step": 54640 }, { "epoch": 4.77, "learning_rate": 2.6133286749934492e-05, "loss": 0.8843, "step": 54650 }, { "epoch": 4.77, "learning_rate": 2.612891955629313e-05, "loss": 0.7555, "step": 54660 }, { "epoch": 4.78, "learning_rate": 2.6124552362651762e-05, "loss": 0.9151, "step": 54670 }, { "epoch": 4.78, "learning_rate": 2.6120185169010398e-05, "loss": 0.9608, "step": 54680 }, { "epoch": 4.78, "learning_rate": 2.6115817975369028e-05, "loss": 0.8695, "step": 54690 }, { "epoch": 4.78, "learning_rate": 2.6111450781727665e-05, "loss": 0.9009, "step": 54700 }, { "epoch": 4.78, "learning_rate": 2.6107083588086295e-05, "loss": 0.994, "step": 54710 }, { "epoch": 4.78, "learning_rate": 2.610271639444493e-05, "loss": 0.8658, "step": 54720 }, { "epoch": 4.78, "learning_rate": 2.609834920080356e-05, "loss": 0.8811, "step": 54730 }, { "epoch": 4.78, "learning_rate": 2.6093982007162198e-05, "loss": 0.7785, "step": 54740 }, { "epoch": 4.78, "learning_rate": 2.6089614813520834e-05, "loss": 0.9999, "step": 54750 }, { "epoch": 4.78, "learning_rate": 2.6085247619879467e-05, "loss": 0.8271, "step": 54760 }, { "epoch": 4.78, "learning_rate": 2.6080880426238104e-05, "loss": 0.8778, "step": 54770 }, { "epoch": 4.78, "learning_rate": 2.6076513232596734e-05, "loss": 0.9008, "step": 54780 }, { "epoch": 4.79, "learning_rate": 2.607214603895537e-05, "loss": 0.7547, "step": 54790 }, { "epoch": 4.79, "learning_rate": 2.6067778845314e-05, "loss": 0.8095, "step": 54800 }, { "epoch": 4.79, "learning_rate": 2.6063411651672637e-05, "loss": 0.9216, "step": 54810 }, { "epoch": 4.79, "learning_rate": 2.6059044458031267e-05, "loss": 0.8368, "step": 54820 }, { "epoch": 4.79, "learning_rate": 2.6054677264389903e-05, "loss": 0.875, "step": 54830 }, { "epoch": 4.79, "learning_rate": 2.605031007074854e-05, "loss": 0.8361, "step": 54840 }, { "epoch": 4.79, "learning_rate": 2.604594287710717e-05, "loss": 0.7949, "step": 54850 }, { "epoch": 4.79, "learning_rate": 2.6041575683465806e-05, "loss": 0.8578, "step": 54860 }, { "epoch": 4.79, "learning_rate": 2.603720848982444e-05, "loss": 0.9148, "step": 54870 }, { "epoch": 4.79, "learning_rate": 2.6032841296183076e-05, "loss": 0.9998, "step": 54880 }, { "epoch": 4.79, "learning_rate": 2.6028474102541706e-05, "loss": 0.9097, "step": 54890 }, { "epoch": 4.8, "learning_rate": 2.6024106908900342e-05, "loss": 0.9776, "step": 54900 }, { "epoch": 4.8, "learning_rate": 2.601973971525898e-05, "loss": 0.9894, "step": 54910 }, { "epoch": 4.8, "learning_rate": 2.601537252161761e-05, "loss": 0.898, "step": 54920 }, { "epoch": 4.8, "learning_rate": 2.6011005327976245e-05, "loss": 0.8623, "step": 54930 }, { "epoch": 4.8, "learning_rate": 2.6006638134334875e-05, "loss": 0.9075, "step": 54940 }, { "epoch": 4.8, "learning_rate": 2.6002270940693512e-05, "loss": 0.8313, "step": 54950 }, { "epoch": 4.8, "learning_rate": 2.5997903747052145e-05, "loss": 0.7536, "step": 54960 }, { "epoch": 4.8, "learning_rate": 2.599353655341078e-05, "loss": 0.7828, "step": 54970 }, { "epoch": 4.8, "learning_rate": 2.598916935976941e-05, "loss": 0.9468, "step": 54980 }, { "epoch": 4.8, "learning_rate": 2.5984802166128048e-05, "loss": 0.8087, "step": 54990 }, { "epoch": 4.8, "learning_rate": 2.5980434972486685e-05, "loss": 0.8527, "step": 55000 }, { "epoch": 4.8, "eval_accuracy": 0.5853394910091383, "eval_loss": 0.8825336694717407, "eval_runtime": 84.1584, "eval_samples_per_second": 120.927, "eval_steps_per_second": 15.126, "step": 55000 }, { "epoch": 4.8, "learning_rate": 2.5976067778845314e-05, "loss": 0.8408, "step": 55010 }, { "epoch": 4.81, "learning_rate": 2.597170058520395e-05, "loss": 0.9825, "step": 55020 }, { "epoch": 4.81, "learning_rate": 2.596733339156258e-05, "loss": 0.8409, "step": 55030 }, { "epoch": 4.81, "learning_rate": 2.5962966197921217e-05, "loss": 0.8846, "step": 55040 }, { "epoch": 4.81, "learning_rate": 2.5958599004279847e-05, "loss": 0.8325, "step": 55050 }, { "epoch": 4.81, "learning_rate": 2.5954231810638484e-05, "loss": 0.94, "step": 55060 }, { "epoch": 4.81, "learning_rate": 2.594986461699712e-05, "loss": 0.8516, "step": 55070 }, { "epoch": 4.81, "learning_rate": 2.5945497423355754e-05, "loss": 0.7597, "step": 55080 }, { "epoch": 4.81, "learning_rate": 2.594113022971439e-05, "loss": 0.9395, "step": 55090 }, { "epoch": 4.81, "learning_rate": 2.593676303607302e-05, "loss": 1.0672, "step": 55100 }, { "epoch": 4.81, "learning_rate": 2.5932395842431657e-05, "loss": 1.0283, "step": 55110 }, { "epoch": 4.81, "learning_rate": 2.5928028648790286e-05, "loss": 0.8528, "step": 55120 }, { "epoch": 4.82, "learning_rate": 2.5923661455148923e-05, "loss": 0.928, "step": 55130 }, { "epoch": 4.82, "learning_rate": 2.5919294261507553e-05, "loss": 0.9047, "step": 55140 }, { "epoch": 4.82, "learning_rate": 2.591492706786619e-05, "loss": 0.8232, "step": 55150 }, { "epoch": 4.82, "learning_rate": 2.5910559874224826e-05, "loss": 0.8576, "step": 55160 }, { "epoch": 4.82, "learning_rate": 2.590619268058346e-05, "loss": 0.9195, "step": 55170 }, { "epoch": 4.82, "learning_rate": 2.5901825486942092e-05, "loss": 0.9917, "step": 55180 }, { "epoch": 4.82, "learning_rate": 2.5897458293300726e-05, "loss": 0.8896, "step": 55190 }, { "epoch": 4.82, "learning_rate": 2.5893091099659362e-05, "loss": 0.9379, "step": 55200 }, { "epoch": 4.82, "learning_rate": 2.5888723906017992e-05, "loss": 1.0526, "step": 55210 }, { "epoch": 4.82, "learning_rate": 2.588435671237663e-05, "loss": 0.8374, "step": 55220 }, { "epoch": 4.82, "learning_rate": 2.587998951873526e-05, "loss": 0.8445, "step": 55230 }, { "epoch": 4.82, "learning_rate": 2.5875622325093895e-05, "loss": 0.8224, "step": 55240 }, { "epoch": 4.83, "learning_rate": 2.587125513145253e-05, "loss": 0.9466, "step": 55250 }, { "epoch": 4.83, "learning_rate": 2.586688793781116e-05, "loss": 0.7838, "step": 55260 }, { "epoch": 4.83, "learning_rate": 2.5862520744169798e-05, "loss": 0.7645, "step": 55270 }, { "epoch": 4.83, "learning_rate": 2.585815355052843e-05, "loss": 0.8938, "step": 55280 }, { "epoch": 4.83, "learning_rate": 2.5853786356887068e-05, "loss": 0.984, "step": 55290 }, { "epoch": 4.83, "learning_rate": 2.5849419163245698e-05, "loss": 0.8483, "step": 55300 }, { "epoch": 4.83, "learning_rate": 2.5845051969604334e-05, "loss": 0.9479, "step": 55310 }, { "epoch": 4.83, "learning_rate": 2.584068477596297e-05, "loss": 0.9273, "step": 55320 }, { "epoch": 4.83, "learning_rate": 2.58363175823216e-05, "loss": 0.865, "step": 55330 }, { "epoch": 4.83, "learning_rate": 2.5831950388680237e-05, "loss": 0.7465, "step": 55340 }, { "epoch": 4.83, "learning_rate": 2.5827583195038867e-05, "loss": 0.917, "step": 55350 }, { "epoch": 4.84, "learning_rate": 2.5823216001397504e-05, "loss": 0.9078, "step": 55360 }, { "epoch": 4.84, "learning_rate": 2.5818848807756137e-05, "loss": 0.8574, "step": 55370 }, { "epoch": 4.84, "learning_rate": 2.581448161411477e-05, "loss": 0.7619, "step": 55380 }, { "epoch": 4.84, "learning_rate": 2.5810114420473403e-05, "loss": 0.9153, "step": 55390 }, { "epoch": 4.84, "learning_rate": 2.580574722683204e-05, "loss": 0.9029, "step": 55400 }, { "epoch": 4.84, "learning_rate": 2.5801380033190676e-05, "loss": 0.8079, "step": 55410 }, { "epoch": 4.84, "learning_rate": 2.5797012839549306e-05, "loss": 0.9732, "step": 55420 }, { "epoch": 4.84, "learning_rate": 2.5792645645907943e-05, "loss": 0.78, "step": 55430 }, { "epoch": 4.84, "learning_rate": 2.5788278452266573e-05, "loss": 0.81, "step": 55440 }, { "epoch": 4.84, "learning_rate": 2.578391125862521e-05, "loss": 0.8849, "step": 55450 }, { "epoch": 4.84, "learning_rate": 2.577954406498384e-05, "loss": 0.9202, "step": 55460 }, { "epoch": 4.84, "learning_rate": 2.5775176871342476e-05, "loss": 0.8827, "step": 55470 }, { "epoch": 4.85, "learning_rate": 2.5770809677701112e-05, "loss": 0.936, "step": 55480 }, { "epoch": 4.85, "learning_rate": 2.5766442484059745e-05, "loss": 0.8547, "step": 55490 }, { "epoch": 4.85, "learning_rate": 2.5762075290418382e-05, "loss": 0.8695, "step": 55500 }, { "epoch": 4.85, "learning_rate": 2.5757708096777012e-05, "loss": 0.834, "step": 55510 }, { "epoch": 4.85, "learning_rate": 2.575334090313565e-05, "loss": 0.9055, "step": 55520 }, { "epoch": 4.85, "learning_rate": 2.5748973709494278e-05, "loss": 0.8538, "step": 55530 }, { "epoch": 4.85, "learning_rate": 2.5744606515852915e-05, "loss": 0.897, "step": 55540 }, { "epoch": 4.85, "learning_rate": 2.5740239322211545e-05, "loss": 0.8901, "step": 55550 }, { "epoch": 4.85, "learning_rate": 2.573587212857018e-05, "loss": 0.8723, "step": 55560 }, { "epoch": 4.85, "learning_rate": 2.5731504934928818e-05, "loss": 0.9328, "step": 55570 }, { "epoch": 4.85, "learning_rate": 2.572713774128745e-05, "loss": 0.8962, "step": 55580 }, { "epoch": 4.86, "learning_rate": 2.5722770547646084e-05, "loss": 0.9669, "step": 55590 }, { "epoch": 4.86, "learning_rate": 2.5718403354004717e-05, "loss": 0.8496, "step": 55600 }, { "epoch": 4.86, "learning_rate": 2.5714036160363354e-05, "loss": 0.8956, "step": 55610 }, { "epoch": 4.86, "learning_rate": 2.5709668966721984e-05, "loss": 0.9863, "step": 55620 }, { "epoch": 4.86, "learning_rate": 2.570530177308062e-05, "loss": 0.9361, "step": 55630 }, { "epoch": 4.86, "learning_rate": 2.570093457943925e-05, "loss": 0.9778, "step": 55640 }, { "epoch": 4.86, "learning_rate": 2.5696567385797887e-05, "loss": 0.9162, "step": 55650 }, { "epoch": 4.86, "learning_rate": 2.5692200192156523e-05, "loss": 0.9669, "step": 55660 }, { "epoch": 4.86, "learning_rate": 2.5687832998515153e-05, "loss": 0.8291, "step": 55670 }, { "epoch": 4.86, "learning_rate": 2.568346580487379e-05, "loss": 0.8723, "step": 55680 }, { "epoch": 4.86, "learning_rate": 2.5679098611232423e-05, "loss": 0.9486, "step": 55690 }, { "epoch": 4.87, "learning_rate": 2.567473141759106e-05, "loss": 0.8915, "step": 55700 }, { "epoch": 4.87, "learning_rate": 2.567036422394969e-05, "loss": 0.8979, "step": 55710 }, { "epoch": 4.87, "learning_rate": 2.5665997030308326e-05, "loss": 0.9399, "step": 55720 }, { "epoch": 4.87, "learning_rate": 2.5661629836666963e-05, "loss": 0.9272, "step": 55730 }, { "epoch": 4.87, "learning_rate": 2.5657262643025592e-05, "loss": 0.7882, "step": 55740 }, { "epoch": 4.87, "learning_rate": 2.565289544938423e-05, "loss": 0.8961, "step": 55750 }, { "epoch": 4.87, "learning_rate": 2.564852825574286e-05, "loss": 0.8455, "step": 55760 }, { "epoch": 4.87, "learning_rate": 2.5644161062101495e-05, "loss": 0.7951, "step": 55770 }, { "epoch": 4.87, "learning_rate": 2.563979386846013e-05, "loss": 0.9221, "step": 55780 }, { "epoch": 4.87, "learning_rate": 2.5635426674818762e-05, "loss": 0.8515, "step": 55790 }, { "epoch": 4.87, "learning_rate": 2.5631059481177395e-05, "loss": 0.8865, "step": 55800 }, { "epoch": 4.87, "learning_rate": 2.562669228753603e-05, "loss": 0.8225, "step": 55810 }, { "epoch": 4.88, "learning_rate": 2.5622325093894668e-05, "loss": 0.8594, "step": 55820 }, { "epoch": 4.88, "learning_rate": 2.5617957900253298e-05, "loss": 0.901, "step": 55830 }, { "epoch": 4.88, "learning_rate": 2.5613590706611935e-05, "loss": 0.9044, "step": 55840 }, { "epoch": 4.88, "learning_rate": 2.5609223512970564e-05, "loss": 0.9934, "step": 55850 }, { "epoch": 4.88, "learning_rate": 2.56048563193292e-05, "loss": 0.8792, "step": 55860 }, { "epoch": 4.88, "learning_rate": 2.560048912568783e-05, "loss": 0.9271, "step": 55870 }, { "epoch": 4.88, "learning_rate": 2.5596121932046467e-05, "loss": 0.862, "step": 55880 }, { "epoch": 4.88, "learning_rate": 2.5591754738405104e-05, "loss": 0.8689, "step": 55890 }, { "epoch": 4.88, "learning_rate": 2.5587387544763737e-05, "loss": 0.8655, "step": 55900 }, { "epoch": 4.88, "learning_rate": 2.5583020351122374e-05, "loss": 0.9845, "step": 55910 }, { "epoch": 4.88, "learning_rate": 2.5578653157481004e-05, "loss": 0.8607, "step": 55920 }, { "epoch": 4.89, "learning_rate": 2.557428596383964e-05, "loss": 0.8727, "step": 55930 }, { "epoch": 4.89, "learning_rate": 2.556991877019827e-05, "loss": 0.8137, "step": 55940 }, { "epoch": 4.89, "learning_rate": 2.5565551576556907e-05, "loss": 0.7839, "step": 55950 }, { "epoch": 4.89, "learning_rate": 2.5561184382915536e-05, "loss": 0.8363, "step": 55960 }, { "epoch": 4.89, "learning_rate": 2.5556817189274173e-05, "loss": 0.884, "step": 55970 }, { "epoch": 4.89, "learning_rate": 2.555244999563281e-05, "loss": 0.986, "step": 55980 }, { "epoch": 4.89, "learning_rate": 2.554808280199144e-05, "loss": 0.9333, "step": 55990 }, { "epoch": 4.89, "learning_rate": 2.5543715608350076e-05, "loss": 0.9797, "step": 56000 }, { "epoch": 4.89, "learning_rate": 2.553934841470871e-05, "loss": 0.8889, "step": 56010 }, { "epoch": 4.89, "learning_rate": 2.5534981221067346e-05, "loss": 0.8699, "step": 56020 }, { "epoch": 4.89, "learning_rate": 2.5530614027425976e-05, "loss": 0.8911, "step": 56030 }, { "epoch": 4.89, "learning_rate": 2.5526246833784612e-05, "loss": 0.9085, "step": 56040 }, { "epoch": 4.9, "learning_rate": 2.5521879640143242e-05, "loss": 0.9573, "step": 56050 }, { "epoch": 4.9, "learning_rate": 2.551751244650188e-05, "loss": 0.8431, "step": 56060 }, { "epoch": 4.9, "learning_rate": 2.5513145252860515e-05, "loss": 0.8312, "step": 56070 }, { "epoch": 4.9, "learning_rate": 2.5508778059219145e-05, "loss": 0.9326, "step": 56080 }, { "epoch": 4.9, "learning_rate": 2.550441086557778e-05, "loss": 0.8839, "step": 56090 }, { "epoch": 4.9, "learning_rate": 2.5500043671936415e-05, "loss": 0.9524, "step": 56100 }, { "epoch": 4.9, "learning_rate": 2.549567647829505e-05, "loss": 0.8407, "step": 56110 }, { "epoch": 4.9, "learning_rate": 2.549130928465368e-05, "loss": 0.8602, "step": 56120 }, { "epoch": 4.9, "learning_rate": 2.5486942091012318e-05, "loss": 0.9407, "step": 56130 }, { "epoch": 4.9, "learning_rate": 2.5482574897370954e-05, "loss": 0.8559, "step": 56140 }, { "epoch": 4.9, "learning_rate": 2.5478207703729584e-05, "loss": 0.8948, "step": 56150 }, { "epoch": 4.91, "learning_rate": 2.547384051008822e-05, "loss": 0.8248, "step": 56160 }, { "epoch": 4.91, "learning_rate": 2.546947331644685e-05, "loss": 0.9013, "step": 56170 }, { "epoch": 4.91, "learning_rate": 2.5465106122805487e-05, "loss": 0.9455, "step": 56180 }, { "epoch": 4.91, "learning_rate": 2.5460738929164117e-05, "loss": 0.8858, "step": 56190 }, { "epoch": 4.91, "learning_rate": 2.5456371735522754e-05, "loss": 0.9458, "step": 56200 }, { "epoch": 4.91, "learning_rate": 2.5452004541881387e-05, "loss": 0.9003, "step": 56210 }, { "epoch": 4.91, "learning_rate": 2.5447637348240023e-05, "loss": 0.9027, "step": 56220 }, { "epoch": 4.91, "learning_rate": 2.544327015459866e-05, "loss": 0.9539, "step": 56230 }, { "epoch": 4.91, "learning_rate": 2.543890296095729e-05, "loss": 0.8294, "step": 56240 }, { "epoch": 4.91, "learning_rate": 2.5434535767315926e-05, "loss": 0.96, "step": 56250 }, { "epoch": 4.91, "learning_rate": 2.5430168573674556e-05, "loss": 0.8781, "step": 56260 }, { "epoch": 4.91, "learning_rate": 2.5425801380033193e-05, "loss": 0.8627, "step": 56270 }, { "epoch": 4.92, "learning_rate": 2.5421434186391823e-05, "loss": 0.8843, "step": 56280 }, { "epoch": 4.92, "learning_rate": 2.541706699275046e-05, "loss": 0.8541, "step": 56290 }, { "epoch": 4.92, "learning_rate": 2.5412699799109096e-05, "loss": 0.9253, "step": 56300 }, { "epoch": 4.92, "learning_rate": 2.540833260546773e-05, "loss": 0.9345, "step": 56310 }, { "epoch": 4.92, "learning_rate": 2.5403965411826362e-05, "loss": 0.868, "step": 56320 }, { "epoch": 4.92, "learning_rate": 2.5399598218184995e-05, "loss": 0.906, "step": 56330 }, { "epoch": 4.92, "learning_rate": 2.5395231024543632e-05, "loss": 0.885, "step": 56340 }, { "epoch": 4.92, "learning_rate": 2.5390863830902262e-05, "loss": 0.8239, "step": 56350 }, { "epoch": 4.92, "learning_rate": 2.53864966372609e-05, "loss": 0.8022, "step": 56360 }, { "epoch": 4.92, "learning_rate": 2.5382129443619528e-05, "loss": 0.7967, "step": 56370 }, { "epoch": 4.92, "learning_rate": 2.5377762249978165e-05, "loss": 0.7712, "step": 56380 }, { "epoch": 4.93, "learning_rate": 2.53733950563368e-05, "loss": 0.7896, "step": 56390 }, { "epoch": 4.93, "learning_rate": 2.536902786269543e-05, "loss": 0.8808, "step": 56400 }, { "epoch": 4.93, "learning_rate": 2.5364660669054068e-05, "loss": 1.0044, "step": 56410 }, { "epoch": 4.93, "learning_rate": 2.53602934754127e-05, "loss": 0.8639, "step": 56420 }, { "epoch": 4.93, "learning_rate": 2.5355926281771338e-05, "loss": 0.8516, "step": 56430 }, { "epoch": 4.93, "learning_rate": 2.5351559088129967e-05, "loss": 0.9889, "step": 56440 }, { "epoch": 4.93, "learning_rate": 2.5347191894488604e-05, "loss": 0.7751, "step": 56450 }, { "epoch": 4.93, "learning_rate": 2.5342824700847234e-05, "loss": 0.8926, "step": 56460 }, { "epoch": 4.93, "learning_rate": 2.533845750720587e-05, "loss": 0.8657, "step": 56470 }, { "epoch": 4.93, "learning_rate": 2.5334090313564507e-05, "loss": 0.8494, "step": 56480 }, { "epoch": 4.93, "learning_rate": 2.5329723119923137e-05, "loss": 0.8189, "step": 56490 }, { "epoch": 4.93, "learning_rate": 2.5325355926281773e-05, "loss": 0.9152, "step": 56500 }, { "epoch": 4.94, "learning_rate": 2.5320988732640407e-05, "loss": 0.7973, "step": 56510 }, { "epoch": 4.94, "learning_rate": 2.531662153899904e-05, "loss": 0.9564, "step": 56520 }, { "epoch": 4.94, "learning_rate": 2.5312254345357673e-05, "loss": 0.9609, "step": 56530 }, { "epoch": 4.94, "learning_rate": 2.530788715171631e-05, "loss": 0.7874, "step": 56540 }, { "epoch": 4.94, "learning_rate": 2.5303519958074946e-05, "loss": 0.7814, "step": 56550 }, { "epoch": 4.94, "learning_rate": 2.5299152764433576e-05, "loss": 0.7896, "step": 56560 }, { "epoch": 4.94, "learning_rate": 2.5294785570792213e-05, "loss": 0.8756, "step": 56570 }, { "epoch": 4.94, "learning_rate": 2.5290418377150842e-05, "loss": 0.8362, "step": 56580 }, { "epoch": 4.94, "learning_rate": 2.528605118350948e-05, "loss": 0.8518, "step": 56590 }, { "epoch": 4.94, "learning_rate": 2.528168398986811e-05, "loss": 1.0048, "step": 56600 }, { "epoch": 4.94, "learning_rate": 2.5277316796226745e-05, "loss": 0.801, "step": 56610 }, { "epoch": 4.95, "learning_rate": 2.527294960258538e-05, "loss": 0.8879, "step": 56620 }, { "epoch": 4.95, "learning_rate": 2.5268582408944015e-05, "loss": 0.9172, "step": 56630 }, { "epoch": 4.95, "learning_rate": 2.5264215215302652e-05, "loss": 0.8005, "step": 56640 }, { "epoch": 4.95, "learning_rate": 2.525984802166128e-05, "loss": 0.8561, "step": 56650 }, { "epoch": 4.95, "learning_rate": 2.5255480828019918e-05, "loss": 0.8505, "step": 56660 }, { "epoch": 4.95, "learning_rate": 2.5251113634378548e-05, "loss": 0.9463, "step": 56670 }, { "epoch": 4.95, "learning_rate": 2.5246746440737185e-05, "loss": 0.8691, "step": 56680 }, { "epoch": 4.95, "learning_rate": 2.5242379247095814e-05, "loss": 0.8023, "step": 56690 }, { "epoch": 4.95, "learning_rate": 2.523801205345445e-05, "loss": 0.8401, "step": 56700 }, { "epoch": 4.95, "learning_rate": 2.5233644859813084e-05, "loss": 0.842, "step": 56710 }, { "epoch": 4.95, "learning_rate": 2.5229277666171717e-05, "loss": 0.8461, "step": 56720 }, { "epoch": 4.96, "learning_rate": 2.5224910472530354e-05, "loss": 0.9341, "step": 56730 }, { "epoch": 4.96, "learning_rate": 2.5220543278888987e-05, "loss": 0.7949, "step": 56740 }, { "epoch": 4.96, "learning_rate": 2.5216176085247624e-05, "loss": 0.8657, "step": 56750 }, { "epoch": 4.96, "learning_rate": 2.5211808891606254e-05, "loss": 0.8273, "step": 56760 }, { "epoch": 4.96, "learning_rate": 2.520744169796489e-05, "loss": 0.9516, "step": 56770 }, { "epoch": 4.96, "learning_rate": 2.520307450432352e-05, "loss": 0.7918, "step": 56780 }, { "epoch": 4.96, "learning_rate": 2.5198707310682157e-05, "loss": 0.7226, "step": 56790 }, { "epoch": 4.96, "learning_rate": 2.5194340117040793e-05, "loss": 0.9836, "step": 56800 }, { "epoch": 4.96, "learning_rate": 2.5189972923399423e-05, "loss": 0.8727, "step": 56810 }, { "epoch": 4.96, "learning_rate": 2.518560572975806e-05, "loss": 1.0078, "step": 56820 }, { "epoch": 4.96, "learning_rate": 2.5181238536116693e-05, "loss": 0.8891, "step": 56830 }, { "epoch": 4.96, "learning_rate": 2.517687134247533e-05, "loss": 0.7374, "step": 56840 }, { "epoch": 4.97, "learning_rate": 2.517250414883396e-05, "loss": 0.866, "step": 56850 }, { "epoch": 4.97, "learning_rate": 2.5168136955192596e-05, "loss": 0.7941, "step": 56860 }, { "epoch": 4.97, "learning_rate": 2.5163769761551226e-05, "loss": 0.9144, "step": 56870 }, { "epoch": 4.97, "learning_rate": 2.5159402567909862e-05, "loss": 0.9529, "step": 56880 }, { "epoch": 4.97, "learning_rate": 2.51550353742685e-05, "loss": 0.8415, "step": 56890 }, { "epoch": 4.97, "learning_rate": 2.515066818062713e-05, "loss": 0.9228, "step": 56900 }, { "epoch": 4.97, "learning_rate": 2.5146300986985765e-05, "loss": 0.987, "step": 56910 }, { "epoch": 4.97, "learning_rate": 2.5141933793344395e-05, "loss": 1.0092, "step": 56920 }, { "epoch": 4.97, "learning_rate": 2.513756659970303e-05, "loss": 0.7489, "step": 56930 }, { "epoch": 4.97, "learning_rate": 2.5133199406061665e-05, "loss": 0.89, "step": 56940 }, { "epoch": 4.97, "learning_rate": 2.51288322124203e-05, "loss": 1.0115, "step": 56950 }, { "epoch": 4.98, "learning_rate": 2.5124465018778938e-05, "loss": 0.7725, "step": 56960 }, { "epoch": 4.98, "learning_rate": 2.5120097825137568e-05, "loss": 0.9675, "step": 56970 }, { "epoch": 4.98, "learning_rate": 2.5115730631496204e-05, "loss": 0.7925, "step": 56980 }, { "epoch": 4.98, "learning_rate": 2.5111363437854834e-05, "loss": 0.8648, "step": 56990 }, { "epoch": 4.98, "learning_rate": 2.510699624421347e-05, "loss": 0.9064, "step": 57000 }, { "epoch": 4.98, "learning_rate": 2.51026290505721e-05, "loss": 0.9294, "step": 57010 }, { "epoch": 4.98, "learning_rate": 2.5098261856930737e-05, "loss": 0.9455, "step": 57020 }, { "epoch": 4.98, "learning_rate": 2.509389466328937e-05, "loss": 0.8114, "step": 57030 }, { "epoch": 4.98, "learning_rate": 2.5089527469648007e-05, "loss": 0.9574, "step": 57040 }, { "epoch": 4.98, "learning_rate": 2.508516027600664e-05, "loss": 0.8764, "step": 57050 }, { "epoch": 4.98, "learning_rate": 2.5080793082365273e-05, "loss": 0.8618, "step": 57060 }, { "epoch": 4.98, "learning_rate": 2.507642588872391e-05, "loss": 0.8172, "step": 57070 }, { "epoch": 4.99, "learning_rate": 2.507205869508254e-05, "loss": 0.8192, "step": 57080 }, { "epoch": 4.99, "learning_rate": 2.5067691501441176e-05, "loss": 0.8546, "step": 57090 }, { "epoch": 4.99, "learning_rate": 2.5063324307799806e-05, "loss": 0.9539, "step": 57100 }, { "epoch": 4.99, "learning_rate": 2.5058957114158443e-05, "loss": 0.7779, "step": 57110 }, { "epoch": 4.99, "learning_rate": 2.5054589920517073e-05, "loss": 0.8665, "step": 57120 }, { "epoch": 4.99, "learning_rate": 2.505022272687571e-05, "loss": 0.8312, "step": 57130 }, { "epoch": 4.99, "learning_rate": 2.5045855533234346e-05, "loss": 0.8917, "step": 57140 }, { "epoch": 4.99, "learning_rate": 2.504148833959298e-05, "loss": 0.7547, "step": 57150 }, { "epoch": 4.99, "learning_rate": 2.5037121145951616e-05, "loss": 0.8672, "step": 57160 }, { "epoch": 4.99, "learning_rate": 2.5032753952310245e-05, "loss": 0.8385, "step": 57170 }, { "epoch": 4.99, "learning_rate": 2.5028386758668882e-05, "loss": 0.8767, "step": 57180 }, { "epoch": 5.0, "learning_rate": 2.5024019565027512e-05, "loss": 0.8385, "step": 57190 }, { "epoch": 5.0, "learning_rate": 2.501965237138615e-05, "loss": 0.7754, "step": 57200 }, { "epoch": 5.0, "learning_rate": 2.5015285177744785e-05, "loss": 0.7636, "step": 57210 }, { "epoch": 5.0, "learning_rate": 2.5010917984103415e-05, "loss": 0.8815, "step": 57220 }, { "epoch": 5.0, "learning_rate": 2.500655079046205e-05, "loss": 0.879, "step": 57230 }, { "epoch": 5.0, "learning_rate": 2.5002183596820685e-05, "loss": 0.921, "step": 57240 }, { "epoch": 5.0, "learning_rate": 2.4997816403179318e-05, "loss": 0.8136, "step": 57250 }, { "epoch": 5.0, "learning_rate": 2.4993449209537954e-05, "loss": 0.878, "step": 57260 }, { "epoch": 5.0, "learning_rate": 2.4989082015896588e-05, "loss": 0.9167, "step": 57270 }, { "epoch": 5.0, "learning_rate": 2.498471482225522e-05, "loss": 1.0372, "step": 57280 }, { "epoch": 5.0, "learning_rate": 2.4980347628613854e-05, "loss": 1.0147, "step": 57290 }, { "epoch": 5.0, "learning_rate": 2.4975980434972487e-05, "loss": 0.8743, "step": 57300 }, { "epoch": 5.01, "learning_rate": 2.497161324133112e-05, "loss": 0.8161, "step": 57310 }, { "epoch": 5.01, "learning_rate": 2.4967246047689754e-05, "loss": 0.8215, "step": 57320 }, { "epoch": 5.01, "learning_rate": 2.496287885404839e-05, "loss": 0.8843, "step": 57330 }, { "epoch": 5.01, "learning_rate": 2.4958511660407023e-05, "loss": 0.8404, "step": 57340 }, { "epoch": 5.01, "learning_rate": 2.4954144466765657e-05, "loss": 0.8752, "step": 57350 }, { "epoch": 5.01, "learning_rate": 2.4949777273124293e-05, "loss": 1.0193, "step": 57360 }, { "epoch": 5.01, "learning_rate": 2.4945410079482926e-05, "loss": 0.8479, "step": 57370 }, { "epoch": 5.01, "learning_rate": 2.494104288584156e-05, "loss": 0.7574, "step": 57380 }, { "epoch": 5.01, "learning_rate": 2.4936675692200193e-05, "loss": 0.8659, "step": 57390 }, { "epoch": 5.01, "learning_rate": 2.4932308498558826e-05, "loss": 0.904, "step": 57400 }, { "epoch": 5.01, "learning_rate": 2.4927941304917463e-05, "loss": 0.9663, "step": 57410 }, { "epoch": 5.02, "learning_rate": 2.4923574111276096e-05, "loss": 0.8758, "step": 57420 }, { "epoch": 5.02, "learning_rate": 2.491920691763473e-05, "loss": 0.7625, "step": 57430 }, { "epoch": 5.02, "learning_rate": 2.4914839723993362e-05, "loss": 0.9155, "step": 57440 }, { "epoch": 5.02, "learning_rate": 2.4910472530351995e-05, "loss": 0.8281, "step": 57450 }, { "epoch": 5.02, "learning_rate": 2.4906105336710632e-05, "loss": 1.0054, "step": 57460 }, { "epoch": 5.02, "learning_rate": 2.4901738143069265e-05, "loss": 0.9516, "step": 57470 }, { "epoch": 5.02, "learning_rate": 2.48973709494279e-05, "loss": 0.8884, "step": 57480 }, { "epoch": 5.02, "learning_rate": 2.489300375578653e-05, "loss": 0.9585, "step": 57490 }, { "epoch": 5.02, "learning_rate": 2.4888636562145168e-05, "loss": 0.9449, "step": 57500 }, { "epoch": 5.02, "learning_rate": 2.48842693685038e-05, "loss": 0.8379, "step": 57510 }, { "epoch": 5.02, "learning_rate": 2.4879902174862435e-05, "loss": 0.9263, "step": 57520 }, { "epoch": 5.02, "learning_rate": 2.4875534981221068e-05, "loss": 0.9118, "step": 57530 }, { "epoch": 5.03, "learning_rate": 2.48711677875797e-05, "loss": 1.0033, "step": 57540 }, { "epoch": 5.03, "learning_rate": 2.4866800593938334e-05, "loss": 0.8928, "step": 57550 }, { "epoch": 5.03, "learning_rate": 2.486243340029697e-05, "loss": 0.9608, "step": 57560 }, { "epoch": 5.03, "learning_rate": 2.4858066206655604e-05, "loss": 0.8171, "step": 57570 }, { "epoch": 5.03, "learning_rate": 2.485369901301424e-05, "loss": 0.8898, "step": 57580 }, { "epoch": 5.03, "learning_rate": 2.4849331819372874e-05, "loss": 0.8329, "step": 57590 }, { "epoch": 5.03, "learning_rate": 2.4844964625731507e-05, "loss": 0.9652, "step": 57600 }, { "epoch": 5.03, "learning_rate": 2.484059743209014e-05, "loss": 0.9223, "step": 57610 }, { "epoch": 5.03, "learning_rate": 2.4836230238448773e-05, "loss": 0.87, "step": 57620 }, { "epoch": 5.03, "learning_rate": 2.4831863044807407e-05, "loss": 0.7914, "step": 57630 }, { "epoch": 5.03, "learning_rate": 2.482749585116604e-05, "loss": 0.8943, "step": 57640 }, { "epoch": 5.04, "learning_rate": 2.4823128657524673e-05, "loss": 0.9384, "step": 57650 }, { "epoch": 5.04, "learning_rate": 2.481876146388331e-05, "loss": 0.9316, "step": 57660 }, { "epoch": 5.04, "learning_rate": 2.4814394270241946e-05, "loss": 0.8601, "step": 57670 }, { "epoch": 5.04, "learning_rate": 2.481002707660058e-05, "loss": 0.7981, "step": 57680 }, { "epoch": 5.04, "learning_rate": 2.4805659882959213e-05, "loss": 0.8552, "step": 57690 }, { "epoch": 5.04, "learning_rate": 2.4801292689317846e-05, "loss": 0.9635, "step": 57700 }, { "epoch": 5.04, "learning_rate": 2.479692549567648e-05, "loss": 0.8065, "step": 57710 }, { "epoch": 5.04, "learning_rate": 2.4792558302035112e-05, "loss": 0.87, "step": 57720 }, { "epoch": 5.04, "learning_rate": 2.4788191108393745e-05, "loss": 0.8571, "step": 57730 }, { "epoch": 5.04, "learning_rate": 2.4783823914752382e-05, "loss": 0.9128, "step": 57740 }, { "epoch": 5.04, "learning_rate": 2.4779456721111015e-05, "loss": 0.8222, "step": 57750 }, { "epoch": 5.04, "learning_rate": 2.477508952746965e-05, "loss": 0.7639, "step": 57760 }, { "epoch": 5.05, "learning_rate": 2.4770722333828285e-05, "loss": 0.8491, "step": 57770 }, { "epoch": 5.05, "learning_rate": 2.4766355140186918e-05, "loss": 0.9271, "step": 57780 }, { "epoch": 5.05, "learning_rate": 2.476198794654555e-05, "loss": 0.8079, "step": 57790 }, { "epoch": 5.05, "learning_rate": 2.4757620752904185e-05, "loss": 0.8767, "step": 57800 }, { "epoch": 5.05, "learning_rate": 2.4753253559262818e-05, "loss": 0.9217, "step": 57810 }, { "epoch": 5.05, "learning_rate": 2.474888636562145e-05, "loss": 0.6838, "step": 57820 }, { "epoch": 5.05, "learning_rate": 2.4744519171980088e-05, "loss": 0.9121, "step": 57830 }, { "epoch": 5.05, "learning_rate": 2.474015197833872e-05, "loss": 0.8244, "step": 57840 }, { "epoch": 5.05, "learning_rate": 2.4735784784697354e-05, "loss": 0.8861, "step": 57850 }, { "epoch": 5.05, "learning_rate": 2.4731417591055987e-05, "loss": 0.9895, "step": 57860 }, { "epoch": 5.05, "learning_rate": 2.4727050397414624e-05, "loss": 0.8631, "step": 57870 }, { "epoch": 5.06, "learning_rate": 2.4722683203773257e-05, "loss": 0.7197, "step": 57880 }, { "epoch": 5.06, "learning_rate": 2.471831601013189e-05, "loss": 0.8961, "step": 57890 }, { "epoch": 5.06, "learning_rate": 2.4713948816490523e-05, "loss": 0.822, "step": 57900 }, { "epoch": 5.06, "learning_rate": 2.470958162284916e-05, "loss": 0.8713, "step": 57910 }, { "epoch": 5.06, "learning_rate": 2.4705214429207793e-05, "loss": 1.09, "step": 57920 }, { "epoch": 5.06, "learning_rate": 2.4700847235566426e-05, "loss": 0.9035, "step": 57930 }, { "epoch": 5.06, "learning_rate": 2.469648004192506e-05, "loss": 0.8497, "step": 57940 }, { "epoch": 5.06, "learning_rate": 2.4692112848283693e-05, "loss": 0.8653, "step": 57950 }, { "epoch": 5.06, "learning_rate": 2.4687745654642326e-05, "loss": 1.0199, "step": 57960 }, { "epoch": 5.06, "learning_rate": 2.4683378461000963e-05, "loss": 0.8263, "step": 57970 }, { "epoch": 5.06, "learning_rate": 2.4679011267359596e-05, "loss": 0.8627, "step": 57980 }, { "epoch": 5.07, "learning_rate": 2.4674644073718232e-05, "loss": 0.8682, "step": 57990 }, { "epoch": 5.07, "learning_rate": 2.4670276880076866e-05, "loss": 0.9534, "step": 58000 }, { "epoch": 5.07, "learning_rate": 2.46659096864355e-05, "loss": 0.8675, "step": 58010 }, { "epoch": 5.07, "learning_rate": 2.4661542492794132e-05, "loss": 0.7888, "step": 58020 }, { "epoch": 5.07, "learning_rate": 2.4657175299152765e-05, "loss": 0.8924, "step": 58030 }, { "epoch": 5.07, "learning_rate": 2.46528081055114e-05, "loss": 0.8404, "step": 58040 }, { "epoch": 5.07, "learning_rate": 2.464844091187003e-05, "loss": 0.9269, "step": 58050 }, { "epoch": 5.07, "learning_rate": 2.4644073718228665e-05, "loss": 0.9535, "step": 58060 }, { "epoch": 5.07, "learning_rate": 2.46397065245873e-05, "loss": 0.8437, "step": 58070 }, { "epoch": 5.07, "learning_rate": 2.4635339330945935e-05, "loss": 0.8265, "step": 58080 }, { "epoch": 5.07, "learning_rate": 2.463097213730457e-05, "loss": 0.8622, "step": 58090 }, { "epoch": 5.07, "learning_rate": 2.4626604943663204e-05, "loss": 0.8962, "step": 58100 }, { "epoch": 5.08, "learning_rate": 2.4622237750021838e-05, "loss": 0.7798, "step": 58110 }, { "epoch": 5.08, "learning_rate": 2.461787055638047e-05, "loss": 0.9141, "step": 58120 }, { "epoch": 5.08, "learning_rate": 2.4613503362739104e-05, "loss": 0.8977, "step": 58130 }, { "epoch": 5.08, "learning_rate": 2.4609136169097737e-05, "loss": 0.8746, "step": 58140 }, { "epoch": 5.08, "learning_rate": 2.4604768975456374e-05, "loss": 0.8839, "step": 58150 }, { "epoch": 5.08, "learning_rate": 2.4600401781815007e-05, "loss": 0.8968, "step": 58160 }, { "epoch": 5.08, "learning_rate": 2.459603458817364e-05, "loss": 0.949, "step": 58170 }, { "epoch": 5.08, "learning_rate": 2.4591667394532273e-05, "loss": 0.9374, "step": 58180 }, { "epoch": 5.08, "learning_rate": 2.458730020089091e-05, "loss": 0.7251, "step": 58190 }, { "epoch": 5.08, "learning_rate": 2.4582933007249543e-05, "loss": 0.9032, "step": 58200 }, { "epoch": 5.08, "learning_rate": 2.4578565813608176e-05, "loss": 0.8718, "step": 58210 }, { "epoch": 5.09, "learning_rate": 2.457419861996681e-05, "loss": 0.7983, "step": 58220 }, { "epoch": 5.09, "learning_rate": 2.4569831426325443e-05, "loss": 0.6961, "step": 58230 }, { "epoch": 5.09, "learning_rate": 2.456546423268408e-05, "loss": 0.8175, "step": 58240 }, { "epoch": 5.09, "learning_rate": 2.4561097039042713e-05, "loss": 1.1069, "step": 58250 }, { "epoch": 5.09, "learning_rate": 2.4556729845401346e-05, "loss": 0.9182, "step": 58260 }, { "epoch": 5.09, "learning_rate": 2.455236265175998e-05, "loss": 0.8557, "step": 58270 }, { "epoch": 5.09, "learning_rate": 2.4547995458118612e-05, "loss": 0.8582, "step": 58280 }, { "epoch": 5.09, "learning_rate": 2.454362826447725e-05, "loss": 0.8022, "step": 58290 }, { "epoch": 5.09, "learning_rate": 2.4539261070835882e-05, "loss": 0.9817, "step": 58300 }, { "epoch": 5.09, "learning_rate": 2.4534893877194515e-05, "loss": 0.8536, "step": 58310 }, { "epoch": 5.09, "learning_rate": 2.4530526683553152e-05, "loss": 0.8165, "step": 58320 }, { "epoch": 5.09, "learning_rate": 2.4526159489911785e-05, "loss": 0.8768, "step": 58330 }, { "epoch": 5.1, "learning_rate": 2.4521792296270418e-05, "loss": 0.8579, "step": 58340 }, { "epoch": 5.1, "learning_rate": 2.451742510262905e-05, "loss": 0.9028, "step": 58350 }, { "epoch": 5.1, "learning_rate": 2.4513057908987685e-05, "loss": 0.9592, "step": 58360 }, { "epoch": 5.1, "learning_rate": 2.4508690715346318e-05, "loss": 0.9952, "step": 58370 }, { "epoch": 5.1, "learning_rate": 2.450432352170495e-05, "loss": 0.8766, "step": 58380 }, { "epoch": 5.1, "learning_rate": 2.4499956328063588e-05, "loss": 0.8449, "step": 58390 }, { "epoch": 5.1, "learning_rate": 2.4495589134422224e-05, "loss": 0.8694, "step": 58400 }, { "epoch": 5.1, "learning_rate": 2.4491221940780857e-05, "loss": 0.9465, "step": 58410 }, { "epoch": 5.1, "learning_rate": 2.448685474713949e-05, "loss": 0.973, "step": 58420 }, { "epoch": 5.1, "learning_rate": 2.4482487553498124e-05, "loss": 0.8539, "step": 58430 }, { "epoch": 5.1, "learning_rate": 2.4478120359856757e-05, "loss": 0.871, "step": 58440 }, { "epoch": 5.11, "learning_rate": 2.447375316621539e-05, "loss": 0.8417, "step": 58450 }, { "epoch": 5.11, "learning_rate": 2.4469385972574023e-05, "loss": 0.9991, "step": 58460 }, { "epoch": 5.11, "learning_rate": 2.4465018778932657e-05, "loss": 0.8741, "step": 58470 }, { "epoch": 5.11, "learning_rate": 2.4460651585291293e-05, "loss": 0.9823, "step": 58480 }, { "epoch": 5.11, "learning_rate": 2.4456284391649926e-05, "loss": 0.9006, "step": 58490 }, { "epoch": 5.11, "learning_rate": 2.4451917198008563e-05, "loss": 0.8269, "step": 58500 }, { "epoch": 5.11, "learning_rate": 2.4447550004367196e-05, "loss": 0.8346, "step": 58510 }, { "epoch": 5.11, "learning_rate": 2.444318281072583e-05, "loss": 0.9089, "step": 58520 }, { "epoch": 5.11, "learning_rate": 2.4438815617084463e-05, "loss": 0.927, "step": 58530 }, { "epoch": 5.11, "learning_rate": 2.4434448423443096e-05, "loss": 0.9567, "step": 58540 }, { "epoch": 5.11, "learning_rate": 2.443008122980173e-05, "loss": 0.8977, "step": 58550 }, { "epoch": 5.11, "learning_rate": 2.4425714036160362e-05, "loss": 1.0314, "step": 58560 }, { "epoch": 5.12, "learning_rate": 2.4421346842519e-05, "loss": 0.8871, "step": 58570 }, { "epoch": 5.12, "learning_rate": 2.4416979648877632e-05, "loss": 0.8301, "step": 58580 }, { "epoch": 5.12, "learning_rate": 2.4412612455236265e-05, "loss": 0.9333, "step": 58590 }, { "epoch": 5.12, "learning_rate": 2.4408245261594902e-05, "loss": 0.8679, "step": 58600 }, { "epoch": 5.12, "learning_rate": 2.4403878067953535e-05, "loss": 0.8118, "step": 58610 }, { "epoch": 5.12, "learning_rate": 2.4399510874312168e-05, "loss": 0.8197, "step": 58620 }, { "epoch": 5.12, "learning_rate": 2.43951436806708e-05, "loss": 1.0347, "step": 58630 }, { "epoch": 5.12, "learning_rate": 2.4390776487029435e-05, "loss": 0.8145, "step": 58640 }, { "epoch": 5.12, "learning_rate": 2.438640929338807e-05, "loss": 0.9394, "step": 58650 }, { "epoch": 5.12, "learning_rate": 2.4382042099746704e-05, "loss": 0.8833, "step": 58660 }, { "epoch": 5.12, "learning_rate": 2.4377674906105338e-05, "loss": 0.9261, "step": 58670 }, { "epoch": 5.13, "learning_rate": 2.437330771246397e-05, "loss": 0.8974, "step": 58680 }, { "epoch": 5.13, "learning_rate": 2.4368940518822604e-05, "loss": 0.7699, "step": 58690 }, { "epoch": 5.13, "learning_rate": 2.436457332518124e-05, "loss": 0.8848, "step": 58700 }, { "epoch": 5.13, "learning_rate": 2.4360206131539874e-05, "loss": 0.8381, "step": 58710 }, { "epoch": 5.13, "learning_rate": 2.4355838937898507e-05, "loss": 0.8093, "step": 58720 }, { "epoch": 5.13, "learning_rate": 2.4351471744257144e-05, "loss": 0.7148, "step": 58730 }, { "epoch": 5.13, "learning_rate": 2.4347104550615777e-05, "loss": 0.7787, "step": 58740 }, { "epoch": 5.13, "learning_rate": 2.434273735697441e-05, "loss": 0.9336, "step": 58750 }, { "epoch": 5.13, "learning_rate": 2.4338370163333043e-05, "loss": 0.8389, "step": 58760 }, { "epoch": 5.13, "learning_rate": 2.4334002969691676e-05, "loss": 0.8652, "step": 58770 }, { "epoch": 5.13, "learning_rate": 2.432963577605031e-05, "loss": 0.8996, "step": 58780 }, { "epoch": 5.13, "learning_rate": 2.4325268582408943e-05, "loss": 0.8875, "step": 58790 }, { "epoch": 5.14, "learning_rate": 2.432090138876758e-05, "loss": 0.8339, "step": 58800 }, { "epoch": 5.14, "learning_rate": 2.4316534195126213e-05, "loss": 0.827, "step": 58810 }, { "epoch": 5.14, "learning_rate": 2.431216700148485e-05, "loss": 0.9233, "step": 58820 }, { "epoch": 5.14, "learning_rate": 2.4307799807843482e-05, "loss": 0.8001, "step": 58830 }, { "epoch": 5.14, "learning_rate": 2.4303432614202116e-05, "loss": 0.8863, "step": 58840 }, { "epoch": 5.14, "learning_rate": 2.429906542056075e-05, "loss": 0.8792, "step": 58850 }, { "epoch": 5.14, "learning_rate": 2.4294698226919382e-05, "loss": 0.7917, "step": 58860 }, { "epoch": 5.14, "learning_rate": 2.4290331033278015e-05, "loss": 0.9436, "step": 58870 }, { "epoch": 5.14, "learning_rate": 2.428596383963665e-05, "loss": 0.8387, "step": 58880 }, { "epoch": 5.14, "learning_rate": 2.4281596645995285e-05, "loss": 0.9882, "step": 58890 }, { "epoch": 5.14, "learning_rate": 2.4277229452353918e-05, "loss": 0.911, "step": 58900 }, { "epoch": 5.15, "learning_rate": 2.427286225871255e-05, "loss": 0.7689, "step": 58910 }, { "epoch": 5.15, "learning_rate": 2.4268495065071188e-05, "loss": 0.88, "step": 58920 }, { "epoch": 5.15, "learning_rate": 2.426412787142982e-05, "loss": 0.8845, "step": 58930 }, { "epoch": 5.15, "learning_rate": 2.4259760677788454e-05, "loss": 0.8373, "step": 58940 }, { "epoch": 5.15, "learning_rate": 2.4255393484147088e-05, "loss": 0.7994, "step": 58950 }, { "epoch": 5.15, "learning_rate": 2.425102629050572e-05, "loss": 0.958, "step": 58960 }, { "epoch": 5.15, "learning_rate": 2.4246659096864354e-05, "loss": 0.9927, "step": 58970 }, { "epoch": 5.15, "learning_rate": 2.424229190322299e-05, "loss": 0.843, "step": 58980 }, { "epoch": 5.15, "learning_rate": 2.4237924709581624e-05, "loss": 0.9085, "step": 58990 }, { "epoch": 5.15, "learning_rate": 2.4233557515940257e-05, "loss": 0.7482, "step": 59000 }, { "epoch": 5.15, "learning_rate": 2.422919032229889e-05, "loss": 0.9312, "step": 59010 }, { "epoch": 5.16, "learning_rate": 2.4224823128657527e-05, "loss": 0.8014, "step": 59020 }, { "epoch": 5.16, "learning_rate": 2.422045593501616e-05, "loss": 0.8531, "step": 59030 }, { "epoch": 5.16, "learning_rate": 2.4216088741374793e-05, "loss": 0.8746, "step": 59040 }, { "epoch": 5.16, "learning_rate": 2.4211721547733426e-05, "loss": 0.8853, "step": 59050 }, { "epoch": 5.16, "learning_rate": 2.4207354354092063e-05, "loss": 0.7875, "step": 59060 }, { "epoch": 5.16, "learning_rate": 2.4202987160450696e-05, "loss": 0.9577, "step": 59070 }, { "epoch": 5.16, "learning_rate": 2.419861996680933e-05, "loss": 0.9239, "step": 59080 }, { "epoch": 5.16, "learning_rate": 2.4194252773167963e-05, "loss": 0.8257, "step": 59090 }, { "epoch": 5.16, "learning_rate": 2.4189885579526596e-05, "loss": 0.9014, "step": 59100 }, { "epoch": 5.16, "learning_rate": 2.4185518385885232e-05, "loss": 0.9679, "step": 59110 }, { "epoch": 5.16, "learning_rate": 2.4181151192243866e-05, "loss": 0.9363, "step": 59120 }, { "epoch": 5.16, "learning_rate": 2.41767839986025e-05, "loss": 0.7754, "step": 59130 }, { "epoch": 5.17, "learning_rate": 2.4172416804961135e-05, "loss": 0.9016, "step": 59140 }, { "epoch": 5.17, "learning_rate": 2.416804961131977e-05, "loss": 0.9691, "step": 59150 }, { "epoch": 5.17, "learning_rate": 2.4163682417678402e-05, "loss": 0.8615, "step": 59160 }, { "epoch": 5.17, "learning_rate": 2.4159315224037035e-05, "loss": 0.8121, "step": 59170 }, { "epoch": 5.17, "learning_rate": 2.4154948030395668e-05, "loss": 0.9551, "step": 59180 }, { "epoch": 5.17, "learning_rate": 2.41505808367543e-05, "loss": 0.8495, "step": 59190 }, { "epoch": 5.17, "learning_rate": 2.4146213643112935e-05, "loss": 0.8617, "step": 59200 }, { "epoch": 5.17, "learning_rate": 2.414184644947157e-05, "loss": 0.8718, "step": 59210 }, { "epoch": 5.17, "learning_rate": 2.4137479255830204e-05, "loss": 0.9411, "step": 59220 }, { "epoch": 5.17, "learning_rate": 2.413311206218884e-05, "loss": 0.7875, "step": 59230 }, { "epoch": 5.17, "learning_rate": 2.4128744868547474e-05, "loss": 1.0318, "step": 59240 }, { "epoch": 5.18, "learning_rate": 2.4124377674906107e-05, "loss": 0.8475, "step": 59250 }, { "epoch": 5.18, "learning_rate": 2.412001048126474e-05, "loss": 0.8566, "step": 59260 }, { "epoch": 5.18, "learning_rate": 2.4115643287623374e-05, "loss": 0.7631, "step": 59270 }, { "epoch": 5.18, "learning_rate": 2.4111276093982007e-05, "loss": 0.8453, "step": 59280 }, { "epoch": 5.18, "learning_rate": 2.410690890034064e-05, "loss": 0.8106, "step": 59290 }, { "epoch": 5.18, "learning_rate": 2.4102541706699277e-05, "loss": 0.8384, "step": 59300 }, { "epoch": 5.18, "learning_rate": 2.409817451305791e-05, "loss": 0.7496, "step": 59310 }, { "epoch": 5.18, "learning_rate": 2.4093807319416543e-05, "loss": 0.8113, "step": 59320 }, { "epoch": 5.18, "learning_rate": 2.408944012577518e-05, "loss": 0.7603, "step": 59330 }, { "epoch": 5.18, "learning_rate": 2.4085072932133813e-05, "loss": 0.7354, "step": 59340 }, { "epoch": 5.18, "learning_rate": 2.4080705738492446e-05, "loss": 0.8383, "step": 59350 }, { "epoch": 5.18, "learning_rate": 2.407633854485108e-05, "loss": 0.8474, "step": 59360 }, { "epoch": 5.19, "learning_rate": 2.4071971351209713e-05, "loss": 1.0848, "step": 59370 }, { "epoch": 5.19, "learning_rate": 2.4067604157568346e-05, "loss": 0.8542, "step": 59380 }, { "epoch": 5.19, "learning_rate": 2.4063236963926982e-05, "loss": 0.7965, "step": 59390 }, { "epoch": 5.19, "learning_rate": 2.4058869770285616e-05, "loss": 0.9441, "step": 59400 }, { "epoch": 5.19, "learning_rate": 2.405450257664425e-05, "loss": 0.9576, "step": 59410 }, { "epoch": 5.19, "learning_rate": 2.4050135383002882e-05, "loss": 0.8588, "step": 59420 }, { "epoch": 5.19, "learning_rate": 2.404576818936152e-05, "loss": 0.8023, "step": 59430 }, { "epoch": 5.19, "learning_rate": 2.4041400995720152e-05, "loss": 0.881, "step": 59440 }, { "epoch": 5.19, "learning_rate": 2.4037033802078785e-05, "loss": 0.8036, "step": 59450 }, { "epoch": 5.19, "learning_rate": 2.4032666608437418e-05, "loss": 1.0808, "step": 59460 }, { "epoch": 5.19, "learning_rate": 2.4028299414796055e-05, "loss": 0.8737, "step": 59470 }, { "epoch": 5.2, "learning_rate": 2.4023932221154688e-05, "loss": 0.9943, "step": 59480 }, { "epoch": 5.2, "learning_rate": 2.401956502751332e-05, "loss": 0.7869, "step": 59490 }, { "epoch": 5.2, "learning_rate": 2.4015197833871954e-05, "loss": 0.8985, "step": 59500 }, { "epoch": 5.2, "learning_rate": 2.4010830640230588e-05, "loss": 0.883, "step": 59510 }, { "epoch": 5.2, "learning_rate": 2.400646344658922e-05, "loss": 0.9529, "step": 59520 }, { "epoch": 5.2, "learning_rate": 2.4002096252947857e-05, "loss": 0.8628, "step": 59530 }, { "epoch": 5.2, "learning_rate": 2.399772905930649e-05, "loss": 0.8911, "step": 59540 }, { "epoch": 5.2, "learning_rate": 2.3993361865665127e-05, "loss": 0.8692, "step": 59550 }, { "epoch": 5.2, "learning_rate": 2.398899467202376e-05, "loss": 0.8999, "step": 59560 }, { "epoch": 5.2, "learning_rate": 2.3984627478382394e-05, "loss": 0.8174, "step": 59570 }, { "epoch": 5.2, "learning_rate": 2.3980260284741027e-05, "loss": 0.8561, "step": 59580 }, { "epoch": 5.2, "learning_rate": 2.397589309109966e-05, "loss": 0.9083, "step": 59590 }, { "epoch": 5.21, "learning_rate": 2.3971525897458293e-05, "loss": 0.9441, "step": 59600 }, { "epoch": 5.21, "learning_rate": 2.3967158703816926e-05, "loss": 0.9313, "step": 59610 }, { "epoch": 5.21, "learning_rate": 2.396279151017556e-05, "loss": 0.8754, "step": 59620 }, { "epoch": 5.21, "learning_rate": 2.3958424316534196e-05, "loss": 0.8525, "step": 59630 }, { "epoch": 5.21, "learning_rate": 2.3954057122892833e-05, "loss": 0.878, "step": 59640 }, { "epoch": 5.21, "learning_rate": 2.3949689929251466e-05, "loss": 0.8969, "step": 59650 }, { "epoch": 5.21, "learning_rate": 2.39453227356101e-05, "loss": 0.7699, "step": 59660 }, { "epoch": 5.21, "learning_rate": 2.3940955541968732e-05, "loss": 0.7783, "step": 59670 }, { "epoch": 5.21, "learning_rate": 2.3936588348327366e-05, "loss": 0.8133, "step": 59680 }, { "epoch": 5.21, "learning_rate": 2.3932221154686e-05, "loss": 0.8283, "step": 59690 }, { "epoch": 5.21, "learning_rate": 2.3927853961044632e-05, "loss": 0.8817, "step": 59700 }, { "epoch": 5.22, "learning_rate": 2.3923486767403265e-05, "loss": 0.9507, "step": 59710 }, { "epoch": 5.22, "learning_rate": 2.3919119573761902e-05, "loss": 0.8477, "step": 59720 }, { "epoch": 5.22, "learning_rate": 2.3914752380120535e-05, "loss": 0.894, "step": 59730 }, { "epoch": 5.22, "learning_rate": 2.391038518647917e-05, "loss": 0.828, "step": 59740 }, { "epoch": 5.22, "learning_rate": 2.3906017992837805e-05, "loss": 0.9923, "step": 59750 }, { "epoch": 5.22, "learning_rate": 2.3901650799196438e-05, "loss": 0.7591, "step": 59760 }, { "epoch": 5.22, "learning_rate": 2.389728360555507e-05, "loss": 0.9485, "step": 59770 }, { "epoch": 5.22, "learning_rate": 2.3892916411913704e-05, "loss": 0.8773, "step": 59780 }, { "epoch": 5.22, "learning_rate": 2.3888549218272338e-05, "loss": 0.8469, "step": 59790 }, { "epoch": 5.22, "learning_rate": 2.3884182024630974e-05, "loss": 0.9079, "step": 59800 }, { "epoch": 5.22, "learning_rate": 2.3879814830989607e-05, "loss": 0.7617, "step": 59810 }, { "epoch": 5.22, "learning_rate": 2.387544763734824e-05, "loss": 0.8758, "step": 59820 }, { "epoch": 5.23, "learning_rate": 2.3871080443706874e-05, "loss": 0.9029, "step": 59830 }, { "epoch": 5.23, "learning_rate": 2.386671325006551e-05, "loss": 0.8992, "step": 59840 }, { "epoch": 5.23, "learning_rate": 2.3862346056424144e-05, "loss": 0.931, "step": 59850 }, { "epoch": 5.23, "learning_rate": 2.3857978862782777e-05, "loss": 0.8416, "step": 59860 }, { "epoch": 5.23, "learning_rate": 2.385361166914141e-05, "loss": 0.995, "step": 59870 }, { "epoch": 5.23, "learning_rate": 2.3849244475500047e-05, "loss": 0.9017, "step": 59880 }, { "epoch": 5.23, "learning_rate": 2.384487728185868e-05, "loss": 0.8322, "step": 59890 }, { "epoch": 5.23, "learning_rate": 2.3840510088217313e-05, "loss": 0.8566, "step": 59900 }, { "epoch": 5.23, "learning_rate": 2.3836142894575946e-05, "loss": 0.9208, "step": 59910 }, { "epoch": 5.23, "learning_rate": 2.383177570093458e-05, "loss": 0.9584, "step": 59920 }, { "epoch": 5.23, "learning_rate": 2.3827408507293213e-05, "loss": 0.9952, "step": 59930 }, { "epoch": 5.24, "learning_rate": 2.382304131365185e-05, "loss": 0.8791, "step": 59940 }, { "epoch": 5.24, "learning_rate": 2.3818674120010483e-05, "loss": 1.0489, "step": 59950 }, { "epoch": 5.24, "learning_rate": 2.381430692636912e-05, "loss": 0.8538, "step": 59960 }, { "epoch": 5.24, "learning_rate": 2.3809939732727752e-05, "loss": 0.9487, "step": 59970 }, { "epoch": 5.24, "learning_rate": 2.3805572539086385e-05, "loss": 0.9225, "step": 59980 }, { "epoch": 5.24, "learning_rate": 2.380120534544502e-05, "loss": 0.8009, "step": 59990 }, { "epoch": 5.24, "learning_rate": 2.3796838151803652e-05, "loss": 0.892, "step": 60000 }, { "epoch": 5.24, "eval_accuracy": 0.5713864596639481, "eval_loss": 0.8869107365608215, "eval_runtime": 84.1447, "eval_samples_per_second": 120.946, "eval_steps_per_second": 15.129, "step": 60000 }, { "epoch": 5.24, "learning_rate": 2.3792470958162285e-05, "loss": 0.8906, "step": 60010 }, { "epoch": 5.24, "learning_rate": 2.378810376452092e-05, "loss": 0.9047, "step": 60020 }, { "epoch": 5.24, "learning_rate": 2.378373657087955e-05, "loss": 0.8665, "step": 60030 }, { "epoch": 5.24, "learning_rate": 2.3779369377238188e-05, "loss": 0.864, "step": 60040 }, { "epoch": 5.24, "learning_rate": 2.377500218359682e-05, "loss": 0.8686, "step": 60050 }, { "epoch": 5.25, "learning_rate": 2.3770634989955458e-05, "loss": 0.8231, "step": 60060 }, { "epoch": 5.25, "learning_rate": 2.376626779631409e-05, "loss": 0.9234, "step": 60070 }, { "epoch": 5.25, "learning_rate": 2.3761900602672724e-05, "loss": 0.982, "step": 60080 }, { "epoch": 5.25, "learning_rate": 2.3757533409031358e-05, "loss": 0.8452, "step": 60090 }, { "epoch": 5.25, "learning_rate": 2.375316621538999e-05, "loss": 0.9406, "step": 60100 }, { "epoch": 5.25, "learning_rate": 2.3748799021748624e-05, "loss": 0.8198, "step": 60110 }, { "epoch": 5.25, "learning_rate": 2.3744431828107257e-05, "loss": 0.8405, "step": 60120 }, { "epoch": 5.25, "learning_rate": 2.3740064634465894e-05, "loss": 0.8184, "step": 60130 }, { "epoch": 5.25, "learning_rate": 2.3735697440824527e-05, "loss": 0.9176, "step": 60140 }, { "epoch": 5.25, "learning_rate": 2.373133024718316e-05, "loss": 0.8155, "step": 60150 }, { "epoch": 5.25, "learning_rate": 2.3726963053541797e-05, "loss": 0.89, "step": 60160 }, { "epoch": 5.26, "learning_rate": 2.372259585990043e-05, "loss": 0.9043, "step": 60170 }, { "epoch": 5.26, "learning_rate": 2.3718228666259063e-05, "loss": 0.9208, "step": 60180 }, { "epoch": 5.26, "learning_rate": 2.3713861472617696e-05, "loss": 0.9164, "step": 60190 }, { "epoch": 5.26, "learning_rate": 2.370949427897633e-05, "loss": 0.9309, "step": 60200 }, { "epoch": 5.26, "learning_rate": 2.3705127085334966e-05, "loss": 0.8442, "step": 60210 }, { "epoch": 5.26, "learning_rate": 2.37007598916936e-05, "loss": 0.9207, "step": 60220 }, { "epoch": 5.26, "learning_rate": 2.3696392698052233e-05, "loss": 0.9599, "step": 60230 }, { "epoch": 5.26, "learning_rate": 2.3692025504410866e-05, "loss": 0.7562, "step": 60240 }, { "epoch": 5.26, "learning_rate": 2.36876583107695e-05, "loss": 0.8412, "step": 60250 }, { "epoch": 5.26, "learning_rate": 2.3683291117128136e-05, "loss": 0.7926, "step": 60260 }, { "epoch": 5.26, "learning_rate": 2.367892392348677e-05, "loss": 0.8693, "step": 60270 }, { "epoch": 5.27, "learning_rate": 2.3674556729845402e-05, "loss": 0.7364, "step": 60280 }, { "epoch": 5.27, "learning_rate": 2.367018953620404e-05, "loss": 0.7431, "step": 60290 }, { "epoch": 5.27, "learning_rate": 2.3665822342562672e-05, "loss": 0.8966, "step": 60300 }, { "epoch": 5.27, "learning_rate": 2.3661455148921305e-05, "loss": 0.8073, "step": 60310 }, { "epoch": 5.27, "learning_rate": 2.3657087955279938e-05, "loss": 0.9106, "step": 60320 }, { "epoch": 5.27, "learning_rate": 2.365272076163857e-05, "loss": 0.9739, "step": 60330 }, { "epoch": 5.27, "learning_rate": 2.3648353567997205e-05, "loss": 0.8649, "step": 60340 }, { "epoch": 5.27, "learning_rate": 2.3643986374355838e-05, "loss": 0.8504, "step": 60350 }, { "epoch": 5.27, "learning_rate": 2.3639619180714474e-05, "loss": 0.8804, "step": 60360 }, { "epoch": 5.27, "learning_rate": 2.363525198707311e-05, "loss": 0.7936, "step": 60370 }, { "epoch": 5.27, "learning_rate": 2.3630884793431744e-05, "loss": 0.932, "step": 60380 }, { "epoch": 5.27, "learning_rate": 2.3626517599790377e-05, "loss": 0.8602, "step": 60390 }, { "epoch": 5.28, "learning_rate": 2.362215040614901e-05, "loss": 0.7765, "step": 60400 }, { "epoch": 5.28, "learning_rate": 2.3617783212507644e-05, "loss": 0.7819, "step": 60410 }, { "epoch": 5.28, "learning_rate": 2.3613416018866277e-05, "loss": 0.918, "step": 60420 }, { "epoch": 5.28, "learning_rate": 2.360904882522491e-05, "loss": 0.8474, "step": 60430 }, { "epoch": 5.28, "learning_rate": 2.3604681631583543e-05, "loss": 0.9154, "step": 60440 }, { "epoch": 5.28, "learning_rate": 2.3600314437942177e-05, "loss": 1.0553, "step": 60450 }, { "epoch": 5.28, "learning_rate": 2.3595947244300813e-05, "loss": 0.8802, "step": 60460 }, { "epoch": 5.28, "learning_rate": 2.359158005065945e-05, "loss": 0.8639, "step": 60470 }, { "epoch": 5.28, "learning_rate": 2.3587212857018083e-05, "loss": 0.9544, "step": 60480 }, { "epoch": 5.28, "learning_rate": 2.3582845663376716e-05, "loss": 0.9577, "step": 60490 }, { "epoch": 5.28, "learning_rate": 2.357847846973535e-05, "loss": 0.9855, "step": 60500 }, { "epoch": 5.29, "learning_rate": 2.3574111276093983e-05, "loss": 0.9287, "step": 60510 }, { "epoch": 5.29, "learning_rate": 2.3569744082452616e-05, "loss": 0.9079, "step": 60520 }, { "epoch": 5.29, "learning_rate": 2.356537688881125e-05, "loss": 0.9435, "step": 60530 }, { "epoch": 5.29, "learning_rate": 2.3561009695169886e-05, "loss": 0.9269, "step": 60540 }, { "epoch": 5.29, "learning_rate": 2.355664250152852e-05, "loss": 0.8396, "step": 60550 }, { "epoch": 5.29, "learning_rate": 2.3552275307887152e-05, "loss": 0.911, "step": 60560 }, { "epoch": 5.29, "learning_rate": 2.354790811424579e-05, "loss": 0.8311, "step": 60570 }, { "epoch": 5.29, "learning_rate": 2.3543540920604422e-05, "loss": 0.8034, "step": 60580 }, { "epoch": 5.29, "learning_rate": 2.3539173726963055e-05, "loss": 0.9135, "step": 60590 }, { "epoch": 5.29, "learning_rate": 2.3534806533321688e-05, "loss": 0.947, "step": 60600 }, { "epoch": 5.29, "learning_rate": 2.353043933968032e-05, "loss": 0.8906, "step": 60610 }, { "epoch": 5.29, "learning_rate": 2.3526072146038958e-05, "loss": 0.8345, "step": 60620 }, { "epoch": 5.3, "learning_rate": 2.352170495239759e-05, "loss": 0.9343, "step": 60630 }, { "epoch": 5.3, "learning_rate": 2.3517337758756224e-05, "loss": 0.9243, "step": 60640 }, { "epoch": 5.3, "learning_rate": 2.3512970565114858e-05, "loss": 0.9506, "step": 60650 }, { "epoch": 5.3, "learning_rate": 2.350860337147349e-05, "loss": 0.8896, "step": 60660 }, { "epoch": 5.3, "learning_rate": 2.3504236177832127e-05, "loss": 0.8548, "step": 60670 }, { "epoch": 5.3, "learning_rate": 2.349986898419076e-05, "loss": 0.9264, "step": 60680 }, { "epoch": 5.3, "learning_rate": 2.3495501790549394e-05, "loss": 0.7681, "step": 60690 }, { "epoch": 5.3, "learning_rate": 2.349113459690803e-05, "loss": 0.8311, "step": 60700 }, { "epoch": 5.3, "learning_rate": 2.3486767403266664e-05, "loss": 0.8368, "step": 60710 }, { "epoch": 5.3, "learning_rate": 2.3482400209625297e-05, "loss": 0.8974, "step": 60720 }, { "epoch": 5.3, "learning_rate": 2.347803301598393e-05, "loss": 0.8484, "step": 60730 }, { "epoch": 5.31, "learning_rate": 2.3473665822342563e-05, "loss": 0.9743, "step": 60740 }, { "epoch": 5.31, "learning_rate": 2.3469298628701196e-05, "loss": 0.9264, "step": 60750 }, { "epoch": 5.31, "learning_rate": 2.346493143505983e-05, "loss": 0.8345, "step": 60760 }, { "epoch": 5.31, "learning_rate": 2.3460564241418466e-05, "loss": 0.8758, "step": 60770 }, { "epoch": 5.31, "learning_rate": 2.34561970477771e-05, "loss": 0.8639, "step": 60780 }, { "epoch": 5.31, "learning_rate": 2.3451829854135736e-05, "loss": 0.8191, "step": 60790 }, { "epoch": 5.31, "learning_rate": 2.344746266049437e-05, "loss": 0.7968, "step": 60800 }, { "epoch": 5.31, "learning_rate": 2.3443095466853002e-05, "loss": 0.7872, "step": 60810 }, { "epoch": 5.31, "learning_rate": 2.3438728273211636e-05, "loss": 1.0634, "step": 60820 }, { "epoch": 5.31, "learning_rate": 2.343436107957027e-05, "loss": 0.9126, "step": 60830 }, { "epoch": 5.31, "learning_rate": 2.3429993885928902e-05, "loss": 0.8293, "step": 60840 }, { "epoch": 5.31, "learning_rate": 2.3425626692287535e-05, "loss": 0.8325, "step": 60850 }, { "epoch": 5.32, "learning_rate": 2.342125949864617e-05, "loss": 0.8191, "step": 60860 }, { "epoch": 5.32, "learning_rate": 2.3416892305004805e-05, "loss": 0.8012, "step": 60870 }, { "epoch": 5.32, "learning_rate": 2.3412525111363438e-05, "loss": 0.7586, "step": 60880 }, { "epoch": 5.32, "learning_rate": 2.3408157917722075e-05, "loss": 0.9035, "step": 60890 }, { "epoch": 5.32, "learning_rate": 2.3403790724080708e-05, "loss": 0.8659, "step": 60900 }, { "epoch": 5.32, "learning_rate": 2.339942353043934e-05, "loss": 0.7565, "step": 60910 }, { "epoch": 5.32, "learning_rate": 2.3395056336797974e-05, "loss": 0.9067, "step": 60920 }, { "epoch": 5.32, "learning_rate": 2.3390689143156608e-05, "loss": 0.8571, "step": 60930 }, { "epoch": 5.32, "learning_rate": 2.338632194951524e-05, "loss": 0.9648, "step": 60940 }, { "epoch": 5.32, "learning_rate": 2.3381954755873877e-05, "loss": 0.8317, "step": 60950 }, { "epoch": 5.32, "learning_rate": 2.337758756223251e-05, "loss": 0.8899, "step": 60960 }, { "epoch": 5.33, "learning_rate": 2.3373220368591144e-05, "loss": 0.8494, "step": 60970 }, { "epoch": 5.33, "learning_rate": 2.3368853174949777e-05, "loss": 0.9222, "step": 60980 }, { "epoch": 5.33, "learning_rate": 2.3364485981308414e-05, "loss": 0.8521, "step": 60990 }, { "epoch": 5.33, "learning_rate": 2.3360118787667047e-05, "loss": 0.9624, "step": 61000 }, { "epoch": 5.33, "learning_rate": 2.335575159402568e-05, "loss": 0.9342, "step": 61010 }, { "epoch": 5.33, "learning_rate": 2.3351384400384313e-05, "loss": 0.8947, "step": 61020 }, { "epoch": 5.33, "learning_rate": 2.334701720674295e-05, "loss": 0.7487, "step": 61030 }, { "epoch": 5.33, "learning_rate": 2.3342650013101583e-05, "loss": 0.8585, "step": 61040 }, { "epoch": 5.33, "learning_rate": 2.3338282819460216e-05, "loss": 0.9755, "step": 61050 }, { "epoch": 5.33, "learning_rate": 2.333391562581885e-05, "loss": 0.7397, "step": 61060 }, { "epoch": 5.33, "learning_rate": 2.3329548432177483e-05, "loss": 0.8591, "step": 61070 }, { "epoch": 5.33, "learning_rate": 2.3325181238536116e-05, "loss": 0.8191, "step": 61080 }, { "epoch": 5.34, "learning_rate": 2.3320814044894752e-05, "loss": 0.8774, "step": 61090 }, { "epoch": 5.34, "learning_rate": 2.3316446851253386e-05, "loss": 0.7391, "step": 61100 }, { "epoch": 5.34, "learning_rate": 2.3312079657612022e-05, "loss": 0.9264, "step": 61110 }, { "epoch": 5.34, "learning_rate": 2.3307712463970655e-05, "loss": 0.9848, "step": 61120 }, { "epoch": 5.34, "learning_rate": 2.330334527032929e-05, "loss": 0.8896, "step": 61130 }, { "epoch": 5.34, "learning_rate": 2.3298978076687922e-05, "loss": 0.8648, "step": 61140 }, { "epoch": 5.34, "learning_rate": 2.3294610883046555e-05, "loss": 0.8862, "step": 61150 }, { "epoch": 5.34, "learning_rate": 2.3290243689405188e-05, "loss": 0.8555, "step": 61160 }, { "epoch": 5.34, "learning_rate": 2.328587649576382e-05, "loss": 0.8952, "step": 61170 }, { "epoch": 5.34, "learning_rate": 2.3281509302122455e-05, "loss": 0.8382, "step": 61180 }, { "epoch": 5.34, "learning_rate": 2.327714210848109e-05, "loss": 0.8466, "step": 61190 }, { "epoch": 5.35, "learning_rate": 2.3272774914839728e-05, "loss": 0.9042, "step": 61200 }, { "epoch": 5.35, "learning_rate": 2.326840772119836e-05, "loss": 0.8693, "step": 61210 }, { "epoch": 5.35, "learning_rate": 2.3264040527556994e-05, "loss": 0.8766, "step": 61220 }, { "epoch": 5.35, "learning_rate": 2.3259673333915627e-05, "loss": 0.939, "step": 61230 }, { "epoch": 5.35, "learning_rate": 2.325530614027426e-05, "loss": 0.8833, "step": 61240 }, { "epoch": 5.35, "learning_rate": 2.3250938946632894e-05, "loss": 0.8603, "step": 61250 }, { "epoch": 5.35, "learning_rate": 2.3246571752991527e-05, "loss": 0.9255, "step": 61260 }, { "epoch": 5.35, "learning_rate": 2.324220455935016e-05, "loss": 0.8185, "step": 61270 }, { "epoch": 5.35, "learning_rate": 2.3237837365708797e-05, "loss": 1.0374, "step": 61280 }, { "epoch": 5.35, "learning_rate": 2.323347017206743e-05, "loss": 0.8589, "step": 61290 }, { "epoch": 5.35, "learning_rate": 2.3229102978426067e-05, "loss": 0.7981, "step": 61300 }, { "epoch": 5.36, "learning_rate": 2.32247357847847e-05, "loss": 0.8819, "step": 61310 }, { "epoch": 5.36, "learning_rate": 2.3220368591143333e-05, "loss": 0.9141, "step": 61320 }, { "epoch": 5.36, "learning_rate": 2.3216001397501966e-05, "loss": 0.9122, "step": 61330 }, { "epoch": 5.36, "learning_rate": 2.32116342038606e-05, "loss": 0.8845, "step": 61340 }, { "epoch": 5.36, "learning_rate": 2.3207267010219233e-05, "loss": 0.861, "step": 61350 }, { "epoch": 5.36, "learning_rate": 2.320289981657787e-05, "loss": 0.8222, "step": 61360 }, { "epoch": 5.36, "learning_rate": 2.3198532622936502e-05, "loss": 0.8534, "step": 61370 }, { "epoch": 5.36, "learning_rate": 2.3194165429295136e-05, "loss": 0.8301, "step": 61380 }, { "epoch": 5.36, "learning_rate": 2.318979823565377e-05, "loss": 0.8938, "step": 61390 }, { "epoch": 5.36, "learning_rate": 2.3185431042012405e-05, "loss": 1.0291, "step": 61400 }, { "epoch": 5.36, "learning_rate": 2.318106384837104e-05, "loss": 0.7898, "step": 61410 }, { "epoch": 5.36, "learning_rate": 2.3176696654729672e-05, "loss": 0.9057, "step": 61420 }, { "epoch": 5.37, "learning_rate": 2.3172329461088305e-05, "loss": 0.9573, "step": 61430 }, { "epoch": 5.37, "learning_rate": 2.316796226744694e-05, "loss": 0.8495, "step": 61440 }, { "epoch": 5.37, "learning_rate": 2.3163595073805575e-05, "loss": 0.9615, "step": 61450 }, { "epoch": 5.37, "learning_rate": 2.3159227880164208e-05, "loss": 0.7368, "step": 61460 }, { "epoch": 5.37, "learning_rate": 2.315486068652284e-05, "loss": 0.8604, "step": 61470 }, { "epoch": 5.37, "learning_rate": 2.3150493492881474e-05, "loss": 0.8551, "step": 61480 }, { "epoch": 5.37, "learning_rate": 2.3146126299240108e-05, "loss": 1.0133, "step": 61490 }, { "epoch": 5.37, "learning_rate": 2.3141759105598744e-05, "loss": 0.8469, "step": 61500 }, { "epoch": 5.37, "learning_rate": 2.3137391911957377e-05, "loss": 0.9136, "step": 61510 }, { "epoch": 5.37, "learning_rate": 2.3133024718316014e-05, "loss": 0.9993, "step": 61520 }, { "epoch": 5.37, "learning_rate": 2.3128657524674647e-05, "loss": 0.8275, "step": 61530 }, { "epoch": 5.38, "learning_rate": 2.312429033103328e-05, "loss": 0.971, "step": 61540 }, { "epoch": 5.38, "learning_rate": 2.3119923137391914e-05, "loss": 0.9059, "step": 61550 }, { "epoch": 5.38, "learning_rate": 2.3115555943750547e-05, "loss": 0.8323, "step": 61560 }, { "epoch": 5.38, "learning_rate": 2.311118875010918e-05, "loss": 0.8729, "step": 61570 }, { "epoch": 5.38, "learning_rate": 2.3106821556467813e-05, "loss": 1.0056, "step": 61580 }, { "epoch": 5.38, "learning_rate": 2.3102454362826446e-05, "loss": 0.9003, "step": 61590 }, { "epoch": 5.38, "learning_rate": 2.3098087169185083e-05, "loss": 0.9352, "step": 61600 }, { "epoch": 5.38, "learning_rate": 2.3093719975543716e-05, "loss": 0.8518, "step": 61610 }, { "epoch": 5.38, "learning_rate": 2.3089352781902353e-05, "loss": 0.7643, "step": 61620 }, { "epoch": 5.38, "learning_rate": 2.3084985588260986e-05, "loss": 0.8201, "step": 61630 }, { "epoch": 5.38, "learning_rate": 2.308061839461962e-05, "loss": 0.8887, "step": 61640 }, { "epoch": 5.38, "learning_rate": 2.3076251200978252e-05, "loss": 0.9234, "step": 61650 }, { "epoch": 5.39, "learning_rate": 2.3071884007336886e-05, "loss": 0.928, "step": 61660 }, { "epoch": 5.39, "learning_rate": 2.306751681369552e-05, "loss": 0.9616, "step": 61670 }, { "epoch": 5.39, "learning_rate": 2.3063149620054152e-05, "loss": 0.9849, "step": 61680 }, { "epoch": 5.39, "learning_rate": 2.305878242641279e-05, "loss": 0.8608, "step": 61690 }, { "epoch": 5.39, "learning_rate": 2.3054415232771422e-05, "loss": 0.8314, "step": 61700 }, { "epoch": 5.39, "learning_rate": 2.3050048039130055e-05, "loss": 0.9254, "step": 61710 }, { "epoch": 5.39, "learning_rate": 2.304568084548869e-05, "loss": 1.0515, "step": 61720 }, { "epoch": 5.39, "learning_rate": 2.3041313651847325e-05, "loss": 0.8843, "step": 61730 }, { "epoch": 5.39, "learning_rate": 2.3036946458205958e-05, "loss": 0.834, "step": 61740 }, { "epoch": 5.39, "learning_rate": 2.303257926456459e-05, "loss": 0.8613, "step": 61750 }, { "epoch": 5.39, "learning_rate": 2.3028212070923224e-05, "loss": 0.8047, "step": 61760 }, { "epoch": 5.4, "learning_rate": 2.302384487728186e-05, "loss": 0.8804, "step": 61770 }, { "epoch": 5.4, "learning_rate": 2.3019477683640494e-05, "loss": 0.8863, "step": 61780 }, { "epoch": 5.4, "learning_rate": 2.3015110489999127e-05, "loss": 0.7929, "step": 61790 }, { "epoch": 5.4, "learning_rate": 2.301074329635776e-05, "loss": 0.9464, "step": 61800 }, { "epoch": 5.4, "learning_rate": 2.3006376102716394e-05, "loss": 0.756, "step": 61810 }, { "epoch": 5.4, "learning_rate": 2.300200890907503e-05, "loss": 0.8567, "step": 61820 }, { "epoch": 5.4, "learning_rate": 2.2997641715433664e-05, "loss": 0.9511, "step": 61830 }, { "epoch": 5.4, "learning_rate": 2.2993274521792297e-05, "loss": 0.8958, "step": 61840 }, { "epoch": 5.4, "learning_rate": 2.2988907328150933e-05, "loss": 0.8724, "step": 61850 }, { "epoch": 5.4, "learning_rate": 2.2984540134509567e-05, "loss": 0.8964, "step": 61860 }, { "epoch": 5.4, "learning_rate": 2.29801729408682e-05, "loss": 0.8813, "step": 61870 }, { "epoch": 5.4, "learning_rate": 2.2975805747226833e-05, "loss": 0.8728, "step": 61880 }, { "epoch": 5.41, "learning_rate": 2.2971438553585466e-05, "loss": 0.8316, "step": 61890 }, { "epoch": 5.41, "learning_rate": 2.29670713599441e-05, "loss": 0.7869, "step": 61900 }, { "epoch": 5.41, "learning_rate": 2.2962704166302733e-05, "loss": 0.9117, "step": 61910 }, { "epoch": 5.41, "learning_rate": 2.295833697266137e-05, "loss": 0.9262, "step": 61920 }, { "epoch": 5.41, "learning_rate": 2.2953969779020006e-05, "loss": 0.7076, "step": 61930 }, { "epoch": 5.41, "learning_rate": 2.294960258537864e-05, "loss": 0.7781, "step": 61940 }, { "epoch": 5.41, "learning_rate": 2.2945235391737272e-05, "loss": 0.8499, "step": 61950 }, { "epoch": 5.41, "learning_rate": 2.2940868198095905e-05, "loss": 0.9149, "step": 61960 }, { "epoch": 5.41, "learning_rate": 2.293650100445454e-05, "loss": 0.9269, "step": 61970 }, { "epoch": 5.41, "learning_rate": 2.2932133810813172e-05, "loss": 1.0674, "step": 61980 }, { "epoch": 5.41, "learning_rate": 2.2927766617171805e-05, "loss": 0.8558, "step": 61990 }, { "epoch": 5.42, "learning_rate": 2.2923399423530438e-05, "loss": 0.8511, "step": 62000 }, { "epoch": 5.42, "learning_rate": 2.291903222988907e-05, "loss": 0.9049, "step": 62010 }, { "epoch": 5.42, "learning_rate": 2.2914665036247708e-05, "loss": 0.7696, "step": 62020 }, { "epoch": 5.42, "learning_rate": 2.2910297842606345e-05, "loss": 1.0512, "step": 62030 }, { "epoch": 5.42, "learning_rate": 2.2905930648964978e-05, "loss": 0.8702, "step": 62040 }, { "epoch": 5.42, "learning_rate": 2.290156345532361e-05, "loss": 0.9286, "step": 62050 }, { "epoch": 5.42, "learning_rate": 2.2897196261682244e-05, "loss": 0.8731, "step": 62060 }, { "epoch": 5.42, "learning_rate": 2.2892829068040877e-05, "loss": 1.0316, "step": 62070 }, { "epoch": 5.42, "learning_rate": 2.288846187439951e-05, "loss": 0.8574, "step": 62080 }, { "epoch": 5.42, "learning_rate": 2.2884094680758144e-05, "loss": 0.8771, "step": 62090 }, { "epoch": 5.42, "learning_rate": 2.287972748711678e-05, "loss": 0.8703, "step": 62100 }, { "epoch": 5.42, "learning_rate": 2.2875360293475414e-05, "loss": 0.8288, "step": 62110 }, { "epoch": 5.43, "learning_rate": 2.2870993099834047e-05, "loss": 0.975, "step": 62120 }, { "epoch": 5.43, "learning_rate": 2.2866625906192683e-05, "loss": 0.77, "step": 62130 }, { "epoch": 5.43, "learning_rate": 2.2862258712551317e-05, "loss": 0.9322, "step": 62140 }, { "epoch": 5.43, "learning_rate": 2.285789151890995e-05, "loss": 0.9062, "step": 62150 }, { "epoch": 5.43, "learning_rate": 2.2853524325268583e-05, "loss": 0.8176, "step": 62160 }, { "epoch": 5.43, "learning_rate": 2.2849157131627216e-05, "loss": 0.8169, "step": 62170 }, { "epoch": 5.43, "learning_rate": 2.2844789937985853e-05, "loss": 0.8238, "step": 62180 }, { "epoch": 5.43, "learning_rate": 2.2840422744344486e-05, "loss": 0.7545, "step": 62190 }, { "epoch": 5.43, "learning_rate": 2.283605555070312e-05, "loss": 0.8096, "step": 62200 }, { "epoch": 5.43, "learning_rate": 2.2831688357061752e-05, "loss": 0.8209, "step": 62210 }, { "epoch": 5.43, "learning_rate": 2.2827321163420386e-05, "loss": 1.0276, "step": 62220 }, { "epoch": 5.44, "learning_rate": 2.2822953969779022e-05, "loss": 0.8156, "step": 62230 }, { "epoch": 5.44, "learning_rate": 2.2818586776137655e-05, "loss": 0.771, "step": 62240 }, { "epoch": 5.44, "learning_rate": 2.281421958249629e-05, "loss": 0.894, "step": 62250 }, { "epoch": 5.44, "learning_rate": 2.2809852388854925e-05, "loss": 0.8001, "step": 62260 }, { "epoch": 5.44, "learning_rate": 2.280548519521356e-05, "loss": 0.9366, "step": 62270 }, { "epoch": 5.44, "learning_rate": 2.280111800157219e-05, "loss": 0.8319, "step": 62280 }, { "epoch": 5.44, "learning_rate": 2.2796750807930825e-05, "loss": 0.7816, "step": 62290 }, { "epoch": 5.44, "learning_rate": 2.2792383614289458e-05, "loss": 0.9195, "step": 62300 }, { "epoch": 5.44, "learning_rate": 2.278801642064809e-05, "loss": 0.8394, "step": 62310 }, { "epoch": 5.44, "learning_rate": 2.2783649227006724e-05, "loss": 0.9128, "step": 62320 }, { "epoch": 5.44, "learning_rate": 2.277928203336536e-05, "loss": 0.9928, "step": 62330 }, { "epoch": 5.45, "learning_rate": 2.2774914839723994e-05, "loss": 0.7859, "step": 62340 }, { "epoch": 5.45, "learning_rate": 2.277054764608263e-05, "loss": 0.8665, "step": 62350 }, { "epoch": 5.45, "learning_rate": 2.2766180452441264e-05, "loss": 0.8064, "step": 62360 }, { "epoch": 5.45, "learning_rate": 2.2761813258799897e-05, "loss": 0.9346, "step": 62370 }, { "epoch": 5.45, "learning_rate": 2.275744606515853e-05, "loss": 1.0134, "step": 62380 }, { "epoch": 5.45, "learning_rate": 2.2753078871517164e-05, "loss": 0.9045, "step": 62390 }, { "epoch": 5.45, "learning_rate": 2.2748711677875797e-05, "loss": 0.9131, "step": 62400 }, { "epoch": 5.45, "learning_rate": 2.274434448423443e-05, "loss": 0.9657, "step": 62410 }, { "epoch": 5.45, "learning_rate": 2.2739977290593063e-05, "loss": 1.0214, "step": 62420 }, { "epoch": 5.45, "learning_rate": 2.27356100969517e-05, "loss": 0.8186, "step": 62430 }, { "epoch": 5.45, "learning_rate": 2.2731242903310333e-05, "loss": 0.926, "step": 62440 }, { "epoch": 5.45, "learning_rate": 2.272687570966897e-05, "loss": 0.7756, "step": 62450 }, { "epoch": 5.46, "learning_rate": 2.2722508516027603e-05, "loss": 0.8555, "step": 62460 }, { "epoch": 5.46, "learning_rate": 2.2718141322386236e-05, "loss": 0.8956, "step": 62470 }, { "epoch": 5.46, "learning_rate": 2.271377412874487e-05, "loss": 0.8725, "step": 62480 }, { "epoch": 5.46, "learning_rate": 2.2709406935103502e-05, "loss": 0.8216, "step": 62490 }, { "epoch": 5.46, "learning_rate": 2.2705039741462136e-05, "loss": 0.8759, "step": 62500 }, { "epoch": 5.46, "learning_rate": 2.2700672547820772e-05, "loss": 0.9083, "step": 62510 }, { "epoch": 5.46, "learning_rate": 2.2696305354179405e-05, "loss": 0.7467, "step": 62520 }, { "epoch": 5.46, "learning_rate": 2.269193816053804e-05, "loss": 0.8664, "step": 62530 }, { "epoch": 5.46, "learning_rate": 2.2687570966896672e-05, "loss": 0.892, "step": 62540 }, { "epoch": 5.46, "learning_rate": 2.268320377325531e-05, "loss": 0.944, "step": 62550 }, { "epoch": 5.46, "learning_rate": 2.267883657961394e-05, "loss": 0.8286, "step": 62560 }, { "epoch": 5.47, "learning_rate": 2.2674469385972575e-05, "loss": 0.9217, "step": 62570 }, { "epoch": 5.47, "learning_rate": 2.2670102192331208e-05, "loss": 1.0016, "step": 62580 }, { "epoch": 5.47, "learning_rate": 2.2665734998689845e-05, "loss": 0.8701, "step": 62590 }, { "epoch": 5.47, "learning_rate": 2.2661367805048478e-05, "loss": 0.8967, "step": 62600 }, { "epoch": 5.47, "learning_rate": 2.265700061140711e-05, "loss": 0.9651, "step": 62610 }, { "epoch": 5.47, "learning_rate": 2.2652633417765744e-05, "loss": 0.9226, "step": 62620 }, { "epoch": 5.47, "learning_rate": 2.2648266224124377e-05, "loss": 0.8448, "step": 62630 }, { "epoch": 5.47, "learning_rate": 2.2643899030483014e-05, "loss": 0.8969, "step": 62640 }, { "epoch": 5.47, "learning_rate": 2.2639531836841647e-05, "loss": 0.8508, "step": 62650 }, { "epoch": 5.47, "learning_rate": 2.263516464320028e-05, "loss": 0.9134, "step": 62660 }, { "epoch": 5.47, "learning_rate": 2.2630797449558917e-05, "loss": 0.9609, "step": 62670 }, { "epoch": 5.47, "learning_rate": 2.262643025591755e-05, "loss": 0.7924, "step": 62680 }, { "epoch": 5.48, "learning_rate": 2.2622063062276183e-05, "loss": 0.8122, "step": 62690 }, { "epoch": 5.48, "learning_rate": 2.2617695868634817e-05, "loss": 0.8909, "step": 62700 }, { "epoch": 5.48, "learning_rate": 2.261332867499345e-05, "loss": 0.9084, "step": 62710 }, { "epoch": 5.48, "learning_rate": 2.2608961481352083e-05, "loss": 0.7647, "step": 62720 }, { "epoch": 5.48, "learning_rate": 2.2604594287710716e-05, "loss": 0.905, "step": 62730 }, { "epoch": 5.48, "learning_rate": 2.2600227094069353e-05, "loss": 0.84, "step": 62740 }, { "epoch": 5.48, "learning_rate": 2.2595859900427986e-05, "loss": 0.8982, "step": 62750 }, { "epoch": 5.48, "learning_rate": 2.2591492706786623e-05, "loss": 0.7623, "step": 62760 }, { "epoch": 5.48, "learning_rate": 2.2587125513145256e-05, "loss": 0.8727, "step": 62770 }, { "epoch": 5.48, "learning_rate": 2.258275831950389e-05, "loss": 0.8834, "step": 62780 }, { "epoch": 5.48, "learning_rate": 2.2578391125862522e-05, "loss": 0.9649, "step": 62790 }, { "epoch": 5.49, "learning_rate": 2.2574023932221155e-05, "loss": 0.77, "step": 62800 }, { "epoch": 5.49, "learning_rate": 2.256965673857979e-05, "loss": 0.8568, "step": 62810 }, { "epoch": 5.49, "learning_rate": 2.2565289544938422e-05, "loss": 0.7886, "step": 62820 }, { "epoch": 5.49, "learning_rate": 2.2560922351297055e-05, "loss": 0.8803, "step": 62830 }, { "epoch": 5.49, "learning_rate": 2.255655515765569e-05, "loss": 0.8836, "step": 62840 }, { "epoch": 5.49, "learning_rate": 2.2552187964014325e-05, "loss": 0.825, "step": 62850 }, { "epoch": 5.49, "learning_rate": 2.254782077037296e-05, "loss": 0.8008, "step": 62860 }, { "epoch": 5.49, "learning_rate": 2.2543453576731595e-05, "loss": 1.0074, "step": 62870 }, { "epoch": 5.49, "learning_rate": 2.2539086383090228e-05, "loss": 0.8799, "step": 62880 }, { "epoch": 5.49, "learning_rate": 2.253471918944886e-05, "loss": 0.9637, "step": 62890 }, { "epoch": 5.49, "learning_rate": 2.2530351995807494e-05, "loss": 0.8583, "step": 62900 }, { "epoch": 5.49, "learning_rate": 2.2525984802166127e-05, "loss": 0.8125, "step": 62910 }, { "epoch": 5.5, "learning_rate": 2.2521617608524764e-05, "loss": 0.8472, "step": 62920 }, { "epoch": 5.5, "learning_rate": 2.2517250414883397e-05, "loss": 0.8797, "step": 62930 }, { "epoch": 5.5, "learning_rate": 2.251288322124203e-05, "loss": 0.9351, "step": 62940 }, { "epoch": 5.5, "learning_rate": 2.2508516027600664e-05, "loss": 0.8929, "step": 62950 }, { "epoch": 5.5, "learning_rate": 2.25041488339593e-05, "loss": 0.8691, "step": 62960 }, { "epoch": 5.5, "learning_rate": 2.2499781640317933e-05, "loss": 0.9113, "step": 62970 }, { "epoch": 5.5, "learning_rate": 2.2495414446676567e-05, "loss": 0.9404, "step": 62980 }, { "epoch": 5.5, "learning_rate": 2.24910472530352e-05, "loss": 0.923, "step": 62990 }, { "epoch": 5.5, "learning_rate": 2.2486680059393836e-05, "loss": 0.8317, "step": 63000 }, { "epoch": 5.5, "learning_rate": 2.248231286575247e-05, "loss": 0.9828, "step": 63010 }, { "epoch": 5.5, "learning_rate": 2.2477945672111103e-05, "loss": 0.7572, "step": 63020 }, { "epoch": 5.51, "learning_rate": 2.2473578478469736e-05, "loss": 0.9358, "step": 63030 }, { "epoch": 5.51, "learning_rate": 2.246921128482837e-05, "loss": 0.8626, "step": 63040 }, { "epoch": 5.51, "learning_rate": 2.2464844091187002e-05, "loss": 0.8014, "step": 63050 }, { "epoch": 5.51, "learning_rate": 2.246047689754564e-05, "loss": 0.8333, "step": 63060 }, { "epoch": 5.51, "learning_rate": 2.2456109703904272e-05, "loss": 0.8724, "step": 63070 }, { "epoch": 5.51, "learning_rate": 2.2451742510262905e-05, "loss": 0.9305, "step": 63080 }, { "epoch": 5.51, "learning_rate": 2.2447375316621542e-05, "loss": 0.8878, "step": 63090 }, { "epoch": 5.51, "learning_rate": 2.2443008122980175e-05, "loss": 0.9423, "step": 63100 }, { "epoch": 5.51, "learning_rate": 2.243864092933881e-05, "loss": 0.8164, "step": 63110 }, { "epoch": 5.51, "learning_rate": 2.243427373569744e-05, "loss": 1.0882, "step": 63120 }, { "epoch": 5.51, "learning_rate": 2.2429906542056075e-05, "loss": 0.7413, "step": 63130 }, { "epoch": 5.51, "learning_rate": 2.2425539348414708e-05, "loss": 0.9093, "step": 63140 }, { "epoch": 5.52, "learning_rate": 2.242117215477334e-05, "loss": 0.9862, "step": 63150 }, { "epoch": 5.52, "learning_rate": 2.2416804961131978e-05, "loss": 0.914, "step": 63160 }, { "epoch": 5.52, "learning_rate": 2.2412437767490614e-05, "loss": 0.8034, "step": 63170 }, { "epoch": 5.52, "learning_rate": 2.2408070573849248e-05, "loss": 0.8823, "step": 63180 }, { "epoch": 5.52, "learning_rate": 2.240370338020788e-05, "loss": 0.8222, "step": 63190 }, { "epoch": 5.52, "learning_rate": 2.2399336186566514e-05, "loss": 0.9031, "step": 63200 }, { "epoch": 5.52, "learning_rate": 2.2394968992925147e-05, "loss": 0.7749, "step": 63210 }, { "epoch": 5.52, "learning_rate": 2.239060179928378e-05, "loss": 0.8033, "step": 63220 }, { "epoch": 5.52, "learning_rate": 2.2386234605642414e-05, "loss": 0.7913, "step": 63230 }, { "epoch": 5.52, "learning_rate": 2.2381867412001047e-05, "loss": 0.9065, "step": 63240 }, { "epoch": 5.52, "learning_rate": 2.2377500218359683e-05, "loss": 0.8541, "step": 63250 }, { "epoch": 5.53, "learning_rate": 2.2373133024718317e-05, "loss": 0.8518, "step": 63260 }, { "epoch": 5.53, "learning_rate": 2.2368765831076953e-05, "loss": 0.8857, "step": 63270 }, { "epoch": 5.53, "learning_rate": 2.2364398637435586e-05, "loss": 0.8318, "step": 63280 }, { "epoch": 5.53, "learning_rate": 2.236003144379422e-05, "loss": 1.0024, "step": 63290 }, { "epoch": 5.53, "learning_rate": 2.2355664250152853e-05, "loss": 0.7786, "step": 63300 }, { "epoch": 5.53, "learning_rate": 2.2351297056511486e-05, "loss": 0.8588, "step": 63310 }, { "epoch": 5.53, "learning_rate": 2.234692986287012e-05, "loss": 0.9941, "step": 63320 }, { "epoch": 5.53, "learning_rate": 2.2342562669228756e-05, "loss": 0.8487, "step": 63330 }, { "epoch": 5.53, "learning_rate": 2.233819547558739e-05, "loss": 0.8686, "step": 63340 }, { "epoch": 5.53, "learning_rate": 2.2333828281946022e-05, "loss": 0.8572, "step": 63350 }, { "epoch": 5.53, "learning_rate": 2.2329461088304655e-05, "loss": 0.7948, "step": 63360 }, { "epoch": 5.53, "learning_rate": 2.2325093894663292e-05, "loss": 0.7038, "step": 63370 }, { "epoch": 5.54, "learning_rate": 2.2320726701021925e-05, "loss": 0.8556, "step": 63380 }, { "epoch": 5.54, "learning_rate": 2.231635950738056e-05, "loss": 0.8156, "step": 63390 }, { "epoch": 5.54, "learning_rate": 2.231199231373919e-05, "loss": 0.8521, "step": 63400 }, { "epoch": 5.54, "learning_rate": 2.2307625120097828e-05, "loss": 0.8186, "step": 63410 }, { "epoch": 5.54, "learning_rate": 2.230325792645646e-05, "loss": 0.8666, "step": 63420 }, { "epoch": 5.54, "learning_rate": 2.2298890732815095e-05, "loss": 0.809, "step": 63430 }, { "epoch": 5.54, "learning_rate": 2.2294523539173728e-05, "loss": 0.7088, "step": 63440 }, { "epoch": 5.54, "learning_rate": 2.229015634553236e-05, "loss": 0.8293, "step": 63450 }, { "epoch": 5.54, "learning_rate": 2.2285789151890994e-05, "loss": 0.861, "step": 63460 }, { "epoch": 5.54, "learning_rate": 2.228142195824963e-05, "loss": 0.8266, "step": 63470 }, { "epoch": 5.54, "learning_rate": 2.2277054764608264e-05, "loss": 0.8553, "step": 63480 }, { "epoch": 5.55, "learning_rate": 2.2272687570966897e-05, "loss": 0.9269, "step": 63490 }, { "epoch": 5.55, "learning_rate": 2.2268320377325534e-05, "loss": 0.7407, "step": 63500 }, { "epoch": 5.55, "learning_rate": 2.2263953183684167e-05, "loss": 0.832, "step": 63510 }, { "epoch": 5.55, "learning_rate": 2.22595859900428e-05, "loss": 0.9284, "step": 63520 }, { "epoch": 5.55, "learning_rate": 2.2255218796401433e-05, "loss": 0.9114, "step": 63530 }, { "epoch": 5.55, "learning_rate": 2.2250851602760067e-05, "loss": 0.9492, "step": 63540 }, { "epoch": 5.55, "learning_rate": 2.22464844091187e-05, "loss": 1.012, "step": 63550 }, { "epoch": 5.55, "learning_rate": 2.2242117215477333e-05, "loss": 0.9354, "step": 63560 }, { "epoch": 5.55, "learning_rate": 2.223775002183597e-05, "loss": 0.922, "step": 63570 }, { "epoch": 5.55, "learning_rate": 2.2233382828194603e-05, "loss": 0.9302, "step": 63580 }, { "epoch": 5.55, "learning_rate": 2.222901563455324e-05, "loss": 0.8902, "step": 63590 }, { "epoch": 5.56, "learning_rate": 2.2224648440911873e-05, "loss": 0.9281, "step": 63600 }, { "epoch": 5.56, "learning_rate": 2.2220281247270506e-05, "loss": 0.8563, "step": 63610 }, { "epoch": 5.56, "learning_rate": 2.221591405362914e-05, "loss": 0.9125, "step": 63620 }, { "epoch": 5.56, "learning_rate": 2.2211546859987772e-05, "loss": 0.9634, "step": 63630 }, { "epoch": 5.56, "learning_rate": 2.2207179666346405e-05, "loss": 0.8288, "step": 63640 }, { "epoch": 5.56, "learning_rate": 2.220281247270504e-05, "loss": 0.8154, "step": 63650 }, { "epoch": 5.56, "learning_rate": 2.2198445279063675e-05, "loss": 0.9269, "step": 63660 }, { "epoch": 5.56, "learning_rate": 2.219407808542231e-05, "loss": 0.8262, "step": 63670 }, { "epoch": 5.56, "learning_rate": 2.218971089178094e-05, "loss": 1.0144, "step": 63680 }, { "epoch": 5.56, "learning_rate": 2.2185343698139578e-05, "loss": 0.8535, "step": 63690 }, { "epoch": 5.56, "learning_rate": 2.218097650449821e-05, "loss": 0.8169, "step": 63700 }, { "epoch": 5.56, "learning_rate": 2.2176609310856845e-05, "loss": 0.8911, "step": 63710 }, { "epoch": 5.57, "learning_rate": 2.2172242117215478e-05, "loss": 0.9983, "step": 63720 }, { "epoch": 5.57, "learning_rate": 2.216787492357411e-05, "loss": 0.7802, "step": 63730 }, { "epoch": 5.57, "learning_rate": 2.2163507729932748e-05, "loss": 0.8215, "step": 63740 }, { "epoch": 5.57, "learning_rate": 2.215914053629138e-05, "loss": 0.9273, "step": 63750 }, { "epoch": 5.57, "learning_rate": 2.2154773342650014e-05, "loss": 0.9814, "step": 63760 }, { "epoch": 5.57, "learning_rate": 2.2150406149008647e-05, "loss": 0.8087, "step": 63770 }, { "epoch": 5.57, "learning_rate": 2.214603895536728e-05, "loss": 0.9381, "step": 63780 }, { "epoch": 5.57, "learning_rate": 2.2141671761725917e-05, "loss": 0.7944, "step": 63790 }, { "epoch": 5.57, "learning_rate": 2.213730456808455e-05, "loss": 0.8773, "step": 63800 }, { "epoch": 5.57, "learning_rate": 2.2132937374443183e-05, "loss": 0.9309, "step": 63810 }, { "epoch": 5.57, "learning_rate": 2.2128570180801817e-05, "loss": 0.9089, "step": 63820 }, { "epoch": 5.58, "learning_rate": 2.2124202987160453e-05, "loss": 0.9133, "step": 63830 }, { "epoch": 5.58, "learning_rate": 2.2119835793519086e-05, "loss": 0.9092, "step": 63840 }, { "epoch": 5.58, "learning_rate": 2.211546859987772e-05, "loss": 0.9028, "step": 63850 }, { "epoch": 5.58, "learning_rate": 2.2111101406236353e-05, "loss": 0.9693, "step": 63860 }, { "epoch": 5.58, "learning_rate": 2.2106734212594986e-05, "loss": 0.7352, "step": 63870 }, { "epoch": 5.58, "learning_rate": 2.210236701895362e-05, "loss": 0.8754, "step": 63880 }, { "epoch": 5.58, "learning_rate": 2.2097999825312256e-05, "loss": 0.8964, "step": 63890 }, { "epoch": 5.58, "learning_rate": 2.209363263167089e-05, "loss": 0.849, "step": 63900 }, { "epoch": 5.58, "learning_rate": 2.2089265438029526e-05, "loss": 0.876, "step": 63910 }, { "epoch": 5.58, "learning_rate": 2.208489824438816e-05, "loss": 0.9414, "step": 63920 }, { "epoch": 5.58, "learning_rate": 2.2080531050746792e-05, "loss": 0.7806, "step": 63930 }, { "epoch": 5.58, "learning_rate": 2.2076163857105425e-05, "loss": 0.7392, "step": 63940 }, { "epoch": 5.59, "learning_rate": 2.207179666346406e-05, "loss": 1.0219, "step": 63950 }, { "epoch": 5.59, "learning_rate": 2.206742946982269e-05, "loss": 0.958, "step": 63960 }, { "epoch": 5.59, "learning_rate": 2.2063062276181325e-05, "loss": 0.8259, "step": 63970 }, { "epoch": 5.59, "learning_rate": 2.2058695082539958e-05, "loss": 0.8795, "step": 63980 }, { "epoch": 5.59, "learning_rate": 2.2054327888898595e-05, "loss": 0.9092, "step": 63990 }, { "epoch": 5.59, "learning_rate": 2.204996069525723e-05, "loss": 0.8709, "step": 64000 }, { "epoch": 5.59, "learning_rate": 2.2045593501615864e-05, "loss": 0.8405, "step": 64010 }, { "epoch": 5.59, "learning_rate": 2.2041226307974498e-05, "loss": 1.0349, "step": 64020 }, { "epoch": 5.59, "learning_rate": 2.203685911433313e-05, "loss": 0.8632, "step": 64030 }, { "epoch": 5.59, "learning_rate": 2.2032491920691764e-05, "loss": 0.8909, "step": 64040 }, { "epoch": 5.59, "learning_rate": 2.2028124727050397e-05, "loss": 0.7575, "step": 64050 }, { "epoch": 5.6, "learning_rate": 2.202375753340903e-05, "loss": 0.9843, "step": 64060 }, { "epoch": 5.6, "learning_rate": 2.2019390339767667e-05, "loss": 0.8759, "step": 64070 }, { "epoch": 5.6, "learning_rate": 2.20150231461263e-05, "loss": 0.8704, "step": 64080 }, { "epoch": 5.6, "learning_rate": 2.2010655952484933e-05, "loss": 0.9342, "step": 64090 }, { "epoch": 5.6, "learning_rate": 2.200628875884357e-05, "loss": 0.8832, "step": 64100 }, { "epoch": 5.6, "learning_rate": 2.2001921565202203e-05, "loss": 0.9286, "step": 64110 }, { "epoch": 5.6, "learning_rate": 2.1997554371560836e-05, "loss": 0.9214, "step": 64120 }, { "epoch": 5.6, "learning_rate": 2.199318717791947e-05, "loss": 0.8283, "step": 64130 }, { "epoch": 5.6, "learning_rate": 2.1988819984278103e-05, "loss": 0.8577, "step": 64140 }, { "epoch": 5.6, "learning_rate": 2.198445279063674e-05, "loss": 0.8962, "step": 64150 }, { "epoch": 5.6, "learning_rate": 2.1980085596995373e-05, "loss": 0.9533, "step": 64160 }, { "epoch": 5.6, "learning_rate": 2.1975718403354006e-05, "loss": 0.9817, "step": 64170 }, { "epoch": 5.61, "learning_rate": 2.197135120971264e-05, "loss": 0.7328, "step": 64180 }, { "epoch": 5.61, "learning_rate": 2.1966984016071272e-05, "loss": 0.7755, "step": 64190 }, { "epoch": 5.61, "learning_rate": 2.196261682242991e-05, "loss": 0.9004, "step": 64200 }, { "epoch": 5.61, "learning_rate": 2.1958249628788542e-05, "loss": 0.9009, "step": 64210 }, { "epoch": 5.61, "learning_rate": 2.1953882435147175e-05, "loss": 0.8476, "step": 64220 }, { "epoch": 5.61, "learning_rate": 2.194951524150581e-05, "loss": 0.8182, "step": 64230 }, { "epoch": 5.61, "learning_rate": 2.1945148047864445e-05, "loss": 0.8202, "step": 64240 }, { "epoch": 5.61, "learning_rate": 2.1940780854223078e-05, "loss": 0.8086, "step": 64250 }, { "epoch": 5.61, "learning_rate": 2.193641366058171e-05, "loss": 0.8383, "step": 64260 }, { "epoch": 5.61, "learning_rate": 2.1932046466940345e-05, "loss": 0.7566, "step": 64270 }, { "epoch": 5.61, "learning_rate": 2.1927679273298978e-05, "loss": 0.8752, "step": 64280 }, { "epoch": 5.62, "learning_rate": 2.192331207965761e-05, "loss": 1.0063, "step": 64290 }, { "epoch": 5.62, "learning_rate": 2.1918944886016248e-05, "loss": 0.8573, "step": 64300 }, { "epoch": 5.62, "learning_rate": 2.191457769237488e-05, "loss": 0.994, "step": 64310 }, { "epoch": 5.62, "learning_rate": 2.1910210498733517e-05, "loss": 0.9567, "step": 64320 }, { "epoch": 5.62, "learning_rate": 2.190584330509215e-05, "loss": 0.8976, "step": 64330 }, { "epoch": 5.62, "learning_rate": 2.1901476111450784e-05, "loss": 0.882, "step": 64340 }, { "epoch": 5.62, "learning_rate": 2.1897108917809417e-05, "loss": 0.8492, "step": 64350 }, { "epoch": 5.62, "learning_rate": 2.189274172416805e-05, "loss": 0.7911, "step": 64360 }, { "epoch": 5.62, "learning_rate": 2.1888374530526683e-05, "loss": 0.7601, "step": 64370 }, { "epoch": 5.62, "learning_rate": 2.1884007336885317e-05, "loss": 0.8566, "step": 64380 }, { "epoch": 5.62, "learning_rate": 2.187964014324395e-05, "loss": 0.8443, "step": 64390 }, { "epoch": 5.62, "learning_rate": 2.1875272949602586e-05, "loss": 0.829, "step": 64400 }, { "epoch": 5.63, "learning_rate": 2.187090575596122e-05, "loss": 0.8437, "step": 64410 }, { "epoch": 5.63, "learning_rate": 2.1866538562319856e-05, "loss": 0.9181, "step": 64420 }, { "epoch": 5.63, "learning_rate": 2.186217136867849e-05, "loss": 0.7817, "step": 64430 }, { "epoch": 5.63, "learning_rate": 2.1857804175037123e-05, "loss": 0.7985, "step": 64440 }, { "epoch": 5.63, "learning_rate": 2.1853436981395756e-05, "loss": 0.7982, "step": 64450 }, { "epoch": 5.63, "learning_rate": 2.184906978775439e-05, "loss": 0.8877, "step": 64460 }, { "epoch": 5.63, "learning_rate": 2.1844702594113022e-05, "loss": 0.9066, "step": 64470 }, { "epoch": 5.63, "learning_rate": 2.184033540047166e-05, "loss": 0.7868, "step": 64480 }, { "epoch": 5.63, "learning_rate": 2.1835968206830292e-05, "loss": 0.8573, "step": 64490 }, { "epoch": 5.63, "learning_rate": 2.1831601013188925e-05, "loss": 0.9611, "step": 64500 }, { "epoch": 5.63, "learning_rate": 2.182723381954756e-05, "loss": 1.0062, "step": 64510 }, { "epoch": 5.64, "learning_rate": 2.1822866625906195e-05, "loss": 0.8564, "step": 64520 }, { "epoch": 5.64, "learning_rate": 2.1818499432264828e-05, "loss": 0.8229, "step": 64530 }, { "epoch": 5.64, "learning_rate": 2.181413223862346e-05, "loss": 0.724, "step": 64540 }, { "epoch": 5.64, "learning_rate": 2.1809765044982095e-05, "loss": 0.8259, "step": 64550 }, { "epoch": 5.64, "learning_rate": 2.180539785134073e-05, "loss": 0.9387, "step": 64560 }, { "epoch": 5.64, "learning_rate": 2.1801030657699364e-05, "loss": 0.9616, "step": 64570 }, { "epoch": 5.64, "learning_rate": 2.1796663464057998e-05, "loss": 0.8795, "step": 64580 }, { "epoch": 5.64, "learning_rate": 2.179229627041663e-05, "loss": 0.8183, "step": 64590 }, { "epoch": 5.64, "learning_rate": 2.1787929076775264e-05, "loss": 0.869, "step": 64600 }, { "epoch": 5.64, "learning_rate": 2.1783561883133897e-05, "loss": 0.9283, "step": 64610 }, { "epoch": 5.64, "learning_rate": 2.1779194689492534e-05, "loss": 0.8006, "step": 64620 }, { "epoch": 5.65, "learning_rate": 2.1774827495851167e-05, "loss": 0.868, "step": 64630 }, { "epoch": 5.65, "learning_rate": 2.17704603022098e-05, "loss": 0.8505, "step": 64640 }, { "epoch": 5.65, "learning_rate": 2.1766093108568437e-05, "loss": 0.8643, "step": 64650 }, { "epoch": 5.65, "learning_rate": 2.176172591492707e-05, "loss": 0.8946, "step": 64660 }, { "epoch": 5.65, "learning_rate": 2.1757358721285703e-05, "loss": 0.838, "step": 64670 }, { "epoch": 5.65, "learning_rate": 2.1752991527644336e-05, "loss": 0.9319, "step": 64680 }, { "epoch": 5.65, "learning_rate": 2.174862433400297e-05, "loss": 0.9045, "step": 64690 }, { "epoch": 5.65, "learning_rate": 2.1744257140361603e-05, "loss": 0.8476, "step": 64700 }, { "epoch": 5.65, "learning_rate": 2.1739889946720236e-05, "loss": 0.83, "step": 64710 }, { "epoch": 5.65, "learning_rate": 2.1735522753078873e-05, "loss": 0.8918, "step": 64720 }, { "epoch": 5.65, "learning_rate": 2.173115555943751e-05, "loss": 0.8371, "step": 64730 }, { "epoch": 5.65, "learning_rate": 2.1726788365796142e-05, "loss": 0.8261, "step": 64740 }, { "epoch": 5.66, "learning_rate": 2.1722421172154776e-05, "loss": 0.8783, "step": 64750 }, { "epoch": 5.66, "learning_rate": 2.171805397851341e-05, "loss": 0.7524, "step": 64760 }, { "epoch": 5.66, "learning_rate": 2.1713686784872042e-05, "loss": 0.9655, "step": 64770 }, { "epoch": 5.66, "learning_rate": 2.1709319591230675e-05, "loss": 0.9037, "step": 64780 }, { "epoch": 5.66, "learning_rate": 2.170495239758931e-05, "loss": 0.8425, "step": 64790 }, { "epoch": 5.66, "learning_rate": 2.1700585203947942e-05, "loss": 0.828, "step": 64800 }, { "epoch": 5.66, "learning_rate": 2.1696218010306578e-05, "loss": 0.7891, "step": 64810 }, { "epoch": 5.66, "learning_rate": 2.169185081666521e-05, "loss": 0.9482, "step": 64820 }, { "epoch": 5.66, "learning_rate": 2.1687483623023848e-05, "loss": 0.9297, "step": 64830 }, { "epoch": 5.66, "learning_rate": 2.168311642938248e-05, "loss": 0.912, "step": 64840 }, { "epoch": 5.66, "learning_rate": 2.1678749235741114e-05, "loss": 0.793, "step": 64850 }, { "epoch": 5.67, "learning_rate": 2.1674382042099748e-05, "loss": 0.8845, "step": 64860 }, { "epoch": 5.67, "learning_rate": 2.167001484845838e-05, "loss": 0.8505, "step": 64870 }, { "epoch": 5.67, "learning_rate": 2.1665647654817014e-05, "loss": 0.922, "step": 64880 }, { "epoch": 5.67, "learning_rate": 2.166128046117565e-05, "loss": 1.1202, "step": 64890 }, { "epoch": 5.67, "learning_rate": 2.1656913267534284e-05, "loss": 0.906, "step": 64900 }, { "epoch": 5.67, "learning_rate": 2.1652546073892917e-05, "loss": 0.924, "step": 64910 }, { "epoch": 5.67, "learning_rate": 2.164817888025155e-05, "loss": 0.783, "step": 64920 }, { "epoch": 5.67, "learning_rate": 2.1643811686610187e-05, "loss": 0.8571, "step": 64930 }, { "epoch": 5.67, "learning_rate": 2.163944449296882e-05, "loss": 0.9919, "step": 64940 }, { "epoch": 5.67, "learning_rate": 2.1635077299327453e-05, "loss": 0.8771, "step": 64950 }, { "epoch": 5.67, "learning_rate": 2.1630710105686086e-05, "loss": 0.8591, "step": 64960 }, { "epoch": 5.67, "learning_rate": 2.162634291204472e-05, "loss": 0.842, "step": 64970 }, { "epoch": 5.68, "learning_rate": 2.1621975718403356e-05, "loss": 0.806, "step": 64980 }, { "epoch": 5.68, "learning_rate": 2.161760852476199e-05, "loss": 0.8558, "step": 64990 }, { "epoch": 5.68, "learning_rate": 2.1613241331120623e-05, "loss": 1.0117, "step": 65000 }, { "epoch": 5.68, "eval_accuracy": 0.577969932200059, "eval_loss": 0.8835278749465942, "eval_runtime": 84.0293, "eval_samples_per_second": 121.113, "eval_steps_per_second": 15.149, "step": 65000 }, { "epoch": 5.68, "learning_rate": 2.1608874137479256e-05, "loss": 0.8909, "step": 65010 }, { "epoch": 5.68, "learning_rate": 2.160450694383789e-05, "loss": 0.8983, "step": 65020 }, { "epoch": 5.68, "learning_rate": 2.1600139750196526e-05, "loss": 0.8191, "step": 65030 }, { "epoch": 5.68, "learning_rate": 2.159577255655516e-05, "loss": 0.8782, "step": 65040 }, { "epoch": 5.68, "learning_rate": 2.1591405362913792e-05, "loss": 0.8589, "step": 65050 }, { "epoch": 5.68, "learning_rate": 2.158703816927243e-05, "loss": 0.8876, "step": 65060 }, { "epoch": 5.68, "learning_rate": 2.1582670975631062e-05, "loss": 0.8115, "step": 65070 }, { "epoch": 5.68, "learning_rate": 2.1578303781989695e-05, "loss": 0.815, "step": 65080 }, { "epoch": 5.69, "learning_rate": 2.1573936588348328e-05, "loss": 0.8477, "step": 65090 }, { "epoch": 5.69, "learning_rate": 2.156956939470696e-05, "loss": 0.7393, "step": 65100 }, { "epoch": 5.69, "learning_rate": 2.1565202201065595e-05, "loss": 0.7704, "step": 65110 }, { "epoch": 5.69, "learning_rate": 2.1560835007424228e-05, "loss": 0.8224, "step": 65120 }, { "epoch": 5.69, "learning_rate": 2.1556467813782865e-05, "loss": 0.9005, "step": 65130 }, { "epoch": 5.69, "learning_rate": 2.1552100620141498e-05, "loss": 0.8264, "step": 65140 }, { "epoch": 5.69, "learning_rate": 2.1547733426500134e-05, "loss": 0.8641, "step": 65150 }, { "epoch": 5.69, "learning_rate": 2.1543366232858767e-05, "loss": 0.9184, "step": 65160 }, { "epoch": 5.69, "learning_rate": 2.15389990392174e-05, "loss": 0.8693, "step": 65170 }, { "epoch": 5.69, "learning_rate": 2.1534631845576034e-05, "loss": 0.9264, "step": 65180 }, { "epoch": 5.69, "learning_rate": 2.1530264651934667e-05, "loss": 0.9447, "step": 65190 }, { "epoch": 5.69, "learning_rate": 2.15258974582933e-05, "loss": 0.9135, "step": 65200 }, { "epoch": 5.7, "learning_rate": 2.1521530264651934e-05, "loss": 0.796, "step": 65210 }, { "epoch": 5.7, "learning_rate": 2.151716307101057e-05, "loss": 1.0523, "step": 65220 }, { "epoch": 5.7, "learning_rate": 2.1512795877369203e-05, "loss": 0.8829, "step": 65230 }, { "epoch": 5.7, "learning_rate": 2.1508428683727837e-05, "loss": 0.8002, "step": 65240 }, { "epoch": 5.7, "learning_rate": 2.1504061490086473e-05, "loss": 0.8364, "step": 65250 }, { "epoch": 5.7, "learning_rate": 2.1499694296445106e-05, "loss": 0.9061, "step": 65260 }, { "epoch": 5.7, "learning_rate": 2.149532710280374e-05, "loss": 0.8403, "step": 65270 }, { "epoch": 5.7, "learning_rate": 2.1490959909162373e-05, "loss": 0.8214, "step": 65280 }, { "epoch": 5.7, "learning_rate": 2.1486592715521006e-05, "loss": 0.9412, "step": 65290 }, { "epoch": 5.7, "learning_rate": 2.1482225521879643e-05, "loss": 0.843, "step": 65300 }, { "epoch": 5.7, "learning_rate": 2.1477858328238276e-05, "loss": 0.8224, "step": 65310 }, { "epoch": 5.71, "learning_rate": 2.147349113459691e-05, "loss": 0.9392, "step": 65320 }, { "epoch": 5.71, "learning_rate": 2.1469123940955542e-05, "loss": 0.9232, "step": 65330 }, { "epoch": 5.71, "learning_rate": 2.1464756747314175e-05, "loss": 0.8754, "step": 65340 }, { "epoch": 5.71, "learning_rate": 2.1460389553672812e-05, "loss": 0.9426, "step": 65350 }, { "epoch": 5.71, "learning_rate": 2.1456022360031445e-05, "loss": 0.851, "step": 65360 }, { "epoch": 5.71, "learning_rate": 2.145165516639008e-05, "loss": 0.8233, "step": 65370 }, { "epoch": 5.71, "learning_rate": 2.144728797274871e-05, "loss": 0.8663, "step": 65380 }, { "epoch": 5.71, "learning_rate": 2.1442920779107348e-05, "loss": 0.8389, "step": 65390 }, { "epoch": 5.71, "learning_rate": 2.143855358546598e-05, "loss": 0.833, "step": 65400 }, { "epoch": 5.71, "learning_rate": 2.1434186391824615e-05, "loss": 0.8165, "step": 65410 }, { "epoch": 5.71, "learning_rate": 2.1429819198183248e-05, "loss": 0.9324, "step": 65420 }, { "epoch": 5.71, "learning_rate": 2.142545200454188e-05, "loss": 0.9035, "step": 65430 }, { "epoch": 5.72, "learning_rate": 2.1421084810900514e-05, "loss": 0.9587, "step": 65440 }, { "epoch": 5.72, "learning_rate": 2.141671761725915e-05, "loss": 0.8757, "step": 65450 }, { "epoch": 5.72, "learning_rate": 2.1412350423617784e-05, "loss": 0.9012, "step": 65460 }, { "epoch": 5.72, "learning_rate": 2.140798322997642e-05, "loss": 0.772, "step": 65470 }, { "epoch": 5.72, "learning_rate": 2.1403616036335054e-05, "loss": 0.8797, "step": 65480 }, { "epoch": 5.72, "learning_rate": 2.1399248842693687e-05, "loss": 0.8669, "step": 65490 }, { "epoch": 5.72, "learning_rate": 2.139488164905232e-05, "loss": 0.8349, "step": 65500 }, { "epoch": 5.72, "learning_rate": 2.1390514455410953e-05, "loss": 0.9612, "step": 65510 }, { "epoch": 5.72, "learning_rate": 2.1386147261769587e-05, "loss": 0.8409, "step": 65520 }, { "epoch": 5.72, "learning_rate": 2.138178006812822e-05, "loss": 0.8216, "step": 65530 }, { "epoch": 5.72, "learning_rate": 2.1377412874486853e-05, "loss": 0.8343, "step": 65540 }, { "epoch": 5.73, "learning_rate": 2.137304568084549e-05, "loss": 0.8968, "step": 65550 }, { "epoch": 5.73, "learning_rate": 2.1368678487204126e-05, "loss": 0.9506, "step": 65560 }, { "epoch": 5.73, "learning_rate": 2.136431129356276e-05, "loss": 0.8396, "step": 65570 }, { "epoch": 5.73, "learning_rate": 2.1359944099921393e-05, "loss": 0.8944, "step": 65580 }, { "epoch": 5.73, "learning_rate": 2.1355576906280026e-05, "loss": 0.8602, "step": 65590 }, { "epoch": 5.73, "learning_rate": 2.135120971263866e-05, "loss": 0.8203, "step": 65600 }, { "epoch": 5.73, "learning_rate": 2.1346842518997292e-05, "loss": 0.9529, "step": 65610 }, { "epoch": 5.73, "learning_rate": 2.1342475325355925e-05, "loss": 0.8553, "step": 65620 }, { "epoch": 5.73, "learning_rate": 2.1338108131714562e-05, "loss": 0.9203, "step": 65630 }, { "epoch": 5.73, "learning_rate": 2.1333740938073195e-05, "loss": 0.8125, "step": 65640 }, { "epoch": 5.73, "learning_rate": 2.132937374443183e-05, "loss": 0.9116, "step": 65650 }, { "epoch": 5.73, "learning_rate": 2.1325006550790465e-05, "loss": 0.9722, "step": 65660 }, { "epoch": 5.74, "learning_rate": 2.1320639357149098e-05, "loss": 0.9646, "step": 65670 }, { "epoch": 5.74, "learning_rate": 2.131627216350773e-05, "loss": 0.9608, "step": 65680 }, { "epoch": 5.74, "learning_rate": 2.1311904969866365e-05, "loss": 0.9981, "step": 65690 }, { "epoch": 5.74, "learning_rate": 2.1307537776224998e-05, "loss": 0.8182, "step": 65700 }, { "epoch": 5.74, "learning_rate": 2.130317058258363e-05, "loss": 0.8699, "step": 65710 }, { "epoch": 5.74, "learning_rate": 2.1298803388942268e-05, "loss": 0.8541, "step": 65720 }, { "epoch": 5.74, "learning_rate": 2.12944361953009e-05, "loss": 0.7404, "step": 65730 }, { "epoch": 5.74, "learning_rate": 2.1290069001659534e-05, "loss": 0.9496, "step": 65740 }, { "epoch": 5.74, "learning_rate": 2.1285701808018167e-05, "loss": 0.8046, "step": 65750 }, { "epoch": 5.74, "learning_rate": 2.1281334614376804e-05, "loss": 0.9161, "step": 65760 }, { "epoch": 5.74, "learning_rate": 2.1276967420735437e-05, "loss": 0.7536, "step": 65770 }, { "epoch": 5.75, "learning_rate": 2.127260022709407e-05, "loss": 0.8572, "step": 65780 }, { "epoch": 5.75, "learning_rate": 2.1268233033452703e-05, "loss": 0.9704, "step": 65790 }, { "epoch": 5.75, "learning_rate": 2.126386583981134e-05, "loss": 0.8907, "step": 65800 }, { "epoch": 5.75, "learning_rate": 2.1259498646169973e-05, "loss": 0.8778, "step": 65810 }, { "epoch": 5.75, "learning_rate": 2.1255131452528606e-05, "loss": 0.8657, "step": 65820 }, { "epoch": 5.75, "learning_rate": 2.125076425888724e-05, "loss": 0.8605, "step": 65830 }, { "epoch": 5.75, "learning_rate": 2.1246397065245873e-05, "loss": 0.8567, "step": 65840 }, { "epoch": 5.75, "learning_rate": 2.1242029871604506e-05, "loss": 0.8835, "step": 65850 }, { "epoch": 5.75, "learning_rate": 2.1237662677963143e-05, "loss": 0.9156, "step": 65860 }, { "epoch": 5.75, "learning_rate": 2.1233295484321776e-05, "loss": 0.9008, "step": 65870 }, { "epoch": 5.75, "learning_rate": 2.1228928290680412e-05, "loss": 0.7759, "step": 65880 }, { "epoch": 5.76, "learning_rate": 2.1224561097039046e-05, "loss": 0.9554, "step": 65890 }, { "epoch": 5.76, "learning_rate": 2.122019390339768e-05, "loss": 0.7889, "step": 65900 }, { "epoch": 5.76, "learning_rate": 2.1215826709756312e-05, "loss": 0.9377, "step": 65910 }, { "epoch": 5.76, "learning_rate": 2.1211459516114945e-05, "loss": 0.9821, "step": 65920 }, { "epoch": 5.76, "learning_rate": 2.120709232247358e-05, "loss": 0.8364, "step": 65930 }, { "epoch": 5.76, "learning_rate": 2.120272512883221e-05, "loss": 0.9137, "step": 65940 }, { "epoch": 5.76, "learning_rate": 2.1198357935190845e-05, "loss": 0.8533, "step": 65950 }, { "epoch": 5.76, "learning_rate": 2.119399074154948e-05, "loss": 0.8995, "step": 65960 }, { "epoch": 5.76, "learning_rate": 2.1189623547908115e-05, "loss": 0.889, "step": 65970 }, { "epoch": 5.76, "learning_rate": 2.118525635426675e-05, "loss": 0.8504, "step": 65980 }, { "epoch": 5.76, "learning_rate": 2.1180889160625384e-05, "loss": 0.9242, "step": 65990 }, { "epoch": 5.76, "learning_rate": 2.1176521966984018e-05, "loss": 0.8515, "step": 66000 }, { "epoch": 5.77, "learning_rate": 2.117215477334265e-05, "loss": 0.8122, "step": 66010 }, { "epoch": 5.77, "learning_rate": 2.1167787579701284e-05, "loss": 0.9569, "step": 66020 }, { "epoch": 5.77, "learning_rate": 2.1163420386059917e-05, "loss": 0.948, "step": 66030 }, { "epoch": 5.77, "learning_rate": 2.1159053192418554e-05, "loss": 0.8402, "step": 66040 }, { "epoch": 5.77, "learning_rate": 2.1154685998777187e-05, "loss": 0.877, "step": 66050 }, { "epoch": 5.77, "learning_rate": 2.115031880513582e-05, "loss": 0.9045, "step": 66060 }, { "epoch": 5.77, "learning_rate": 2.1145951611494453e-05, "loss": 0.8824, "step": 66070 }, { "epoch": 5.77, "learning_rate": 2.114158441785309e-05, "loss": 0.8222, "step": 66080 }, { "epoch": 5.77, "learning_rate": 2.1137217224211723e-05, "loss": 0.9113, "step": 66090 }, { "epoch": 5.77, "learning_rate": 2.1132850030570356e-05, "loss": 0.9138, "step": 66100 }, { "epoch": 5.77, "learning_rate": 2.112848283692899e-05, "loss": 0.8876, "step": 66110 }, { "epoch": 5.78, "learning_rate": 2.1124115643287623e-05, "loss": 0.8118, "step": 66120 }, { "epoch": 5.78, "learning_rate": 2.111974844964626e-05, "loss": 0.903, "step": 66130 }, { "epoch": 5.78, "learning_rate": 2.1115381256004893e-05, "loss": 0.7364, "step": 66140 }, { "epoch": 5.78, "learning_rate": 2.1111014062363526e-05, "loss": 0.8836, "step": 66150 }, { "epoch": 5.78, "learning_rate": 2.110664686872216e-05, "loss": 0.9428, "step": 66160 }, { "epoch": 5.78, "learning_rate": 2.1102279675080796e-05, "loss": 0.8284, "step": 66170 }, { "epoch": 5.78, "learning_rate": 2.109791248143943e-05, "loss": 0.723, "step": 66180 }, { "epoch": 5.78, "learning_rate": 2.1093545287798062e-05, "loss": 0.8828, "step": 66190 }, { "epoch": 5.78, "learning_rate": 2.1089178094156695e-05, "loss": 0.8729, "step": 66200 }, { "epoch": 5.78, "learning_rate": 2.1084810900515332e-05, "loss": 0.8279, "step": 66210 }, { "epoch": 5.78, "learning_rate": 2.1080443706873965e-05, "loss": 0.919, "step": 66220 }, { "epoch": 5.78, "learning_rate": 2.1076076513232598e-05, "loss": 0.8345, "step": 66230 }, { "epoch": 5.79, "learning_rate": 2.107170931959123e-05, "loss": 0.7839, "step": 66240 }, { "epoch": 5.79, "learning_rate": 2.1067342125949865e-05, "loss": 0.9342, "step": 66250 }, { "epoch": 5.79, "learning_rate": 2.1062974932308498e-05, "loss": 0.8032, "step": 66260 }, { "epoch": 5.79, "learning_rate": 2.1058607738667134e-05, "loss": 0.8325, "step": 66270 }, { "epoch": 5.79, "learning_rate": 2.1054240545025768e-05, "loss": 0.9863, "step": 66280 }, { "epoch": 5.79, "learning_rate": 2.1049873351384404e-05, "loss": 0.847, "step": 66290 }, { "epoch": 5.79, "learning_rate": 2.1045506157743037e-05, "loss": 0.8028, "step": 66300 }, { "epoch": 5.79, "learning_rate": 2.104113896410167e-05, "loss": 0.8895, "step": 66310 }, { "epoch": 5.79, "learning_rate": 2.1036771770460304e-05, "loss": 0.7802, "step": 66320 }, { "epoch": 5.79, "learning_rate": 2.1032404576818937e-05, "loss": 1.0068, "step": 66330 }, { "epoch": 5.79, "learning_rate": 2.102803738317757e-05, "loss": 0.8602, "step": 66340 }, { "epoch": 5.8, "learning_rate": 2.1023670189536203e-05, "loss": 0.9518, "step": 66350 }, { "epoch": 5.8, "learning_rate": 2.1019302995894837e-05, "loss": 0.8684, "step": 66360 }, { "epoch": 5.8, "learning_rate": 2.1014935802253473e-05, "loss": 0.9604, "step": 66370 }, { "epoch": 5.8, "learning_rate": 2.1010568608612106e-05, "loss": 0.8643, "step": 66380 }, { "epoch": 5.8, "learning_rate": 2.1006201414970743e-05, "loss": 0.9044, "step": 66390 }, { "epoch": 5.8, "learning_rate": 2.1001834221329376e-05, "loss": 0.8976, "step": 66400 }, { "epoch": 5.8, "learning_rate": 2.099746702768801e-05, "loss": 0.8857, "step": 66410 }, { "epoch": 5.8, "learning_rate": 2.0993099834046643e-05, "loss": 0.927, "step": 66420 }, { "epoch": 5.8, "learning_rate": 2.0988732640405276e-05, "loss": 0.7316, "step": 66430 }, { "epoch": 5.8, "learning_rate": 2.098436544676391e-05, "loss": 1.0086, "step": 66440 }, { "epoch": 5.8, "learning_rate": 2.0979998253122542e-05, "loss": 0.9598, "step": 66450 }, { "epoch": 5.8, "learning_rate": 2.097563105948118e-05, "loss": 0.9516, "step": 66460 }, { "epoch": 5.81, "learning_rate": 2.0971263865839812e-05, "loss": 0.7899, "step": 66470 }, { "epoch": 5.81, "learning_rate": 2.0966896672198445e-05, "loss": 0.8028, "step": 66480 }, { "epoch": 5.81, "learning_rate": 2.0962529478557082e-05, "loss": 0.8247, "step": 66490 }, { "epoch": 5.81, "learning_rate": 2.0958162284915715e-05, "loss": 0.8829, "step": 66500 }, { "epoch": 5.81, "learning_rate": 2.0953795091274348e-05, "loss": 0.9615, "step": 66510 }, { "epoch": 5.81, "learning_rate": 2.094942789763298e-05, "loss": 0.8542, "step": 66520 }, { "epoch": 5.81, "learning_rate": 2.0945060703991615e-05, "loss": 0.8429, "step": 66530 }, { "epoch": 5.81, "learning_rate": 2.094069351035025e-05, "loss": 0.8463, "step": 66540 }, { "epoch": 5.81, "learning_rate": 2.0936326316708884e-05, "loss": 0.8289, "step": 66550 }, { "epoch": 5.81, "learning_rate": 2.0931959123067518e-05, "loss": 0.8515, "step": 66560 }, { "epoch": 5.81, "learning_rate": 2.092759192942615e-05, "loss": 0.925, "step": 66570 }, { "epoch": 5.82, "learning_rate": 2.0923224735784784e-05, "loss": 0.778, "step": 66580 }, { "epoch": 5.82, "learning_rate": 2.091885754214342e-05, "loss": 0.8521, "step": 66590 }, { "epoch": 5.82, "learning_rate": 2.0914490348502054e-05, "loss": 0.9623, "step": 66600 }, { "epoch": 5.82, "learning_rate": 2.0910123154860687e-05, "loss": 0.8972, "step": 66610 }, { "epoch": 5.82, "learning_rate": 2.0905755961219324e-05, "loss": 0.9787, "step": 66620 }, { "epoch": 5.82, "learning_rate": 2.0901388767577957e-05, "loss": 0.8738, "step": 66630 }, { "epoch": 5.82, "learning_rate": 2.089702157393659e-05, "loss": 0.9201, "step": 66640 }, { "epoch": 5.82, "learning_rate": 2.0892654380295223e-05, "loss": 0.7641, "step": 66650 }, { "epoch": 5.82, "learning_rate": 2.0888287186653856e-05, "loss": 0.9715, "step": 66660 }, { "epoch": 5.82, "learning_rate": 2.088391999301249e-05, "loss": 0.8257, "step": 66670 }, { "epoch": 5.82, "learning_rate": 2.0879552799371123e-05, "loss": 0.8799, "step": 66680 }, { "epoch": 5.82, "learning_rate": 2.087518560572976e-05, "loss": 0.8859, "step": 66690 }, { "epoch": 5.83, "learning_rate": 2.0870818412088396e-05, "loss": 0.9008, "step": 66700 }, { "epoch": 5.83, "learning_rate": 2.086645121844703e-05, "loss": 0.8654, "step": 66710 }, { "epoch": 5.83, "learning_rate": 2.0862084024805662e-05, "loss": 0.9315, "step": 66720 }, { "epoch": 5.83, "learning_rate": 2.0857716831164296e-05, "loss": 0.9429, "step": 66730 }, { "epoch": 5.83, "learning_rate": 2.085334963752293e-05, "loss": 0.9633, "step": 66740 }, { "epoch": 5.83, "learning_rate": 2.0848982443881562e-05, "loss": 0.9307, "step": 66750 }, { "epoch": 5.83, "learning_rate": 2.0844615250240195e-05, "loss": 0.8695, "step": 66760 }, { "epoch": 5.83, "learning_rate": 2.084024805659883e-05, "loss": 0.9057, "step": 66770 }, { "epoch": 5.83, "learning_rate": 2.0835880862957465e-05, "loss": 0.8934, "step": 66780 }, { "epoch": 5.83, "learning_rate": 2.0831513669316098e-05, "loss": 0.7734, "step": 66790 }, { "epoch": 5.83, "learning_rate": 2.0827146475674735e-05, "loss": 1.0339, "step": 66800 }, { "epoch": 5.84, "learning_rate": 2.0822779282033368e-05, "loss": 0.8847, "step": 66810 }, { "epoch": 5.84, "learning_rate": 2.0818412088392e-05, "loss": 0.8607, "step": 66820 }, { "epoch": 5.84, "learning_rate": 2.0814044894750634e-05, "loss": 0.9982, "step": 66830 }, { "epoch": 5.84, "learning_rate": 2.0809677701109268e-05, "loss": 0.8619, "step": 66840 }, { "epoch": 5.84, "learning_rate": 2.08053105074679e-05, "loss": 0.9428, "step": 66850 }, { "epoch": 5.84, "learning_rate": 2.0800943313826534e-05, "loss": 0.8512, "step": 66860 }, { "epoch": 5.84, "learning_rate": 2.079657612018517e-05, "loss": 0.8902, "step": 66870 }, { "epoch": 5.84, "learning_rate": 2.0792208926543804e-05, "loss": 0.9269, "step": 66880 }, { "epoch": 5.84, "learning_rate": 2.0787841732902437e-05, "loss": 0.8597, "step": 66890 }, { "epoch": 5.84, "learning_rate": 2.0783474539261074e-05, "loss": 0.9148, "step": 66900 }, { "epoch": 5.84, "learning_rate": 2.0779107345619707e-05, "loss": 0.8988, "step": 66910 }, { "epoch": 5.85, "learning_rate": 2.077474015197834e-05, "loss": 0.8358, "step": 66920 }, { "epoch": 5.85, "learning_rate": 2.0770372958336973e-05, "loss": 0.9573, "step": 66930 }, { "epoch": 5.85, "learning_rate": 2.0766005764695606e-05, "loss": 0.9684, "step": 66940 }, { "epoch": 5.85, "learning_rate": 2.0761638571054243e-05, "loss": 0.9644, "step": 66950 }, { "epoch": 5.85, "learning_rate": 2.0757271377412876e-05, "loss": 0.8406, "step": 66960 }, { "epoch": 5.85, "learning_rate": 2.075290418377151e-05, "loss": 0.8177, "step": 66970 }, { "epoch": 5.85, "learning_rate": 2.0748536990130143e-05, "loss": 0.9331, "step": 66980 }, { "epoch": 5.85, "learning_rate": 2.0744169796488776e-05, "loss": 0.8129, "step": 66990 }, { "epoch": 5.85, "learning_rate": 2.0739802602847412e-05, "loss": 1.0307, "step": 67000 }, { "epoch": 5.85, "learning_rate": 2.0735435409206046e-05, "loss": 0.9192, "step": 67010 }, { "epoch": 5.85, "learning_rate": 2.073106821556468e-05, "loss": 1.0397, "step": 67020 }, { "epoch": 5.85, "learning_rate": 2.0726701021923315e-05, "loss": 0.8991, "step": 67030 }, { "epoch": 5.86, "learning_rate": 2.072233382828195e-05, "loss": 0.8031, "step": 67040 }, { "epoch": 5.86, "learning_rate": 2.0717966634640582e-05, "loss": 1.0027, "step": 67050 }, { "epoch": 5.86, "learning_rate": 2.0713599440999215e-05, "loss": 0.7178, "step": 67060 }, { "epoch": 5.86, "learning_rate": 2.0709232247357848e-05, "loss": 0.8535, "step": 67070 }, { "epoch": 5.86, "learning_rate": 2.070486505371648e-05, "loss": 0.9027, "step": 67080 }, { "epoch": 5.86, "learning_rate": 2.0700497860075115e-05, "loss": 0.8461, "step": 67090 }, { "epoch": 5.86, "learning_rate": 2.069613066643375e-05, "loss": 0.8407, "step": 67100 }, { "epoch": 5.86, "learning_rate": 2.0691763472792384e-05, "loss": 0.9115, "step": 67110 }, { "epoch": 5.86, "learning_rate": 2.068739627915102e-05, "loss": 0.9102, "step": 67120 }, { "epoch": 5.86, "learning_rate": 2.0683029085509654e-05, "loss": 0.854, "step": 67130 }, { "epoch": 5.86, "learning_rate": 2.0678661891868287e-05, "loss": 0.9116, "step": 67140 }, { "epoch": 5.87, "learning_rate": 2.067429469822692e-05, "loss": 0.9519, "step": 67150 }, { "epoch": 5.87, "learning_rate": 2.0669927504585554e-05, "loss": 0.9068, "step": 67160 }, { "epoch": 5.87, "learning_rate": 2.0665560310944187e-05, "loss": 0.8704, "step": 67170 }, { "epoch": 5.87, "learning_rate": 2.066119311730282e-05, "loss": 0.8197, "step": 67180 }, { "epoch": 5.87, "learning_rate": 2.0656825923661457e-05, "loss": 0.8943, "step": 67190 }, { "epoch": 5.87, "learning_rate": 2.065245873002009e-05, "loss": 0.9484, "step": 67200 }, { "epoch": 5.87, "learning_rate": 2.0648091536378723e-05, "loss": 0.7652, "step": 67210 }, { "epoch": 5.87, "learning_rate": 2.064372434273736e-05, "loss": 0.7839, "step": 67220 }, { "epoch": 5.87, "learning_rate": 2.0639357149095993e-05, "loss": 0.8406, "step": 67230 }, { "epoch": 5.87, "learning_rate": 2.0634989955454626e-05, "loss": 0.9207, "step": 67240 }, { "epoch": 5.87, "learning_rate": 2.063062276181326e-05, "loss": 0.7654, "step": 67250 }, { "epoch": 5.87, "learning_rate": 2.0626255568171893e-05, "loss": 0.9468, "step": 67260 }, { "epoch": 5.88, "learning_rate": 2.0621888374530526e-05, "loss": 0.8815, "step": 67270 }, { "epoch": 5.88, "learning_rate": 2.0617521180889162e-05, "loss": 0.8564, "step": 67280 }, { "epoch": 5.88, "learning_rate": 2.0613153987247796e-05, "loss": 0.8549, "step": 67290 }, { "epoch": 5.88, "learning_rate": 2.060878679360643e-05, "loss": 0.9549, "step": 67300 }, { "epoch": 5.88, "learning_rate": 2.0604419599965062e-05, "loss": 0.8662, "step": 67310 }, { "epoch": 5.88, "learning_rate": 2.06000524063237e-05, "loss": 0.9604, "step": 67320 }, { "epoch": 5.88, "learning_rate": 2.0595685212682332e-05, "loss": 0.8383, "step": 67330 }, { "epoch": 5.88, "learning_rate": 2.0591318019040965e-05, "loss": 0.8593, "step": 67340 }, { "epoch": 5.88, "learning_rate": 2.0586950825399598e-05, "loss": 0.8092, "step": 67350 }, { "epoch": 5.88, "learning_rate": 2.0582583631758235e-05, "loss": 0.7489, "step": 67360 }, { "epoch": 5.88, "learning_rate": 2.0578216438116868e-05, "loss": 0.8368, "step": 67370 }, { "epoch": 5.89, "learning_rate": 2.05738492444755e-05, "loss": 0.9108, "step": 67380 }, { "epoch": 5.89, "learning_rate": 2.0569482050834134e-05, "loss": 1.0016, "step": 67390 }, { "epoch": 5.89, "learning_rate": 2.0565114857192768e-05, "loss": 0.728, "step": 67400 }, { "epoch": 5.89, "learning_rate": 2.05607476635514e-05, "loss": 0.8117, "step": 67410 }, { "epoch": 5.89, "learning_rate": 2.0556380469910037e-05, "loss": 0.7944, "step": 67420 }, { "epoch": 5.89, "learning_rate": 2.055201327626867e-05, "loss": 0.8578, "step": 67430 }, { "epoch": 5.89, "learning_rate": 2.0547646082627307e-05, "loss": 0.8623, "step": 67440 }, { "epoch": 5.89, "learning_rate": 2.054327888898594e-05, "loss": 0.9747, "step": 67450 }, { "epoch": 5.89, "learning_rate": 2.0538911695344574e-05, "loss": 0.8834, "step": 67460 }, { "epoch": 5.89, "learning_rate": 2.0534544501703207e-05, "loss": 0.7967, "step": 67470 }, { "epoch": 5.89, "learning_rate": 2.053017730806184e-05, "loss": 0.8661, "step": 67480 }, { "epoch": 5.89, "learning_rate": 2.0525810114420473e-05, "loss": 0.9423, "step": 67490 }, { "epoch": 5.9, "learning_rate": 2.0521442920779106e-05, "loss": 0.9083, "step": 67500 }, { "epoch": 5.9, "learning_rate": 2.051707572713774e-05, "loss": 0.8896, "step": 67510 }, { "epoch": 5.9, "learning_rate": 2.0512708533496376e-05, "loss": 0.793, "step": 67520 }, { "epoch": 5.9, "learning_rate": 2.0508341339855013e-05, "loss": 0.9266, "step": 67530 }, { "epoch": 5.9, "learning_rate": 2.0503974146213646e-05, "loss": 0.8262, "step": 67540 }, { "epoch": 5.9, "learning_rate": 2.049960695257228e-05, "loss": 0.8291, "step": 67550 }, { "epoch": 5.9, "learning_rate": 2.0495239758930912e-05, "loss": 0.9196, "step": 67560 }, { "epoch": 5.9, "learning_rate": 2.0490872565289546e-05, "loss": 0.9492, "step": 67570 }, { "epoch": 5.9, "learning_rate": 2.048650537164818e-05, "loss": 0.9704, "step": 67580 }, { "epoch": 5.9, "learning_rate": 2.0482138178006812e-05, "loss": 0.8431, "step": 67590 }, { "epoch": 5.9, "learning_rate": 2.0477770984365445e-05, "loss": 0.9295, "step": 67600 }, { "epoch": 5.91, "learning_rate": 2.0473403790724082e-05, "loss": 0.9114, "step": 67610 }, { "epoch": 5.91, "learning_rate": 2.0469036597082715e-05, "loss": 0.9099, "step": 67620 }, { "epoch": 5.91, "learning_rate": 2.046466940344135e-05, "loss": 0.9554, "step": 67630 }, { "epoch": 5.91, "learning_rate": 2.0460302209799985e-05, "loss": 1.0066, "step": 67640 }, { "epoch": 5.91, "learning_rate": 2.0455935016158618e-05, "loss": 0.8418, "step": 67650 }, { "epoch": 5.91, "learning_rate": 2.045156782251725e-05, "loss": 0.8535, "step": 67660 }, { "epoch": 5.91, "learning_rate": 2.0447200628875884e-05, "loss": 0.9418, "step": 67670 }, { "epoch": 5.91, "learning_rate": 2.0442833435234518e-05, "loss": 0.8075, "step": 67680 }, { "epoch": 5.91, "learning_rate": 2.0438466241593154e-05, "loss": 0.7805, "step": 67690 }, { "epoch": 5.91, "learning_rate": 2.0434099047951787e-05, "loss": 0.8782, "step": 67700 }, { "epoch": 5.91, "learning_rate": 2.042973185431042e-05, "loss": 0.9749, "step": 67710 }, { "epoch": 5.91, "learning_rate": 2.0425364660669054e-05, "loss": 0.8497, "step": 67720 }, { "epoch": 5.92, "learning_rate": 2.042099746702769e-05, "loss": 0.8789, "step": 67730 }, { "epoch": 5.92, "learning_rate": 2.0416630273386324e-05, "loss": 0.8991, "step": 67740 }, { "epoch": 5.92, "learning_rate": 2.0412263079744957e-05, "loss": 0.86, "step": 67750 }, { "epoch": 5.92, "learning_rate": 2.040789588610359e-05, "loss": 0.7683, "step": 67760 }, { "epoch": 5.92, "learning_rate": 2.0403528692462227e-05, "loss": 0.9027, "step": 67770 }, { "epoch": 5.92, "learning_rate": 2.039916149882086e-05, "loss": 0.9064, "step": 67780 }, { "epoch": 5.92, "learning_rate": 2.0394794305179493e-05, "loss": 0.8494, "step": 67790 }, { "epoch": 5.92, "learning_rate": 2.0390427111538126e-05, "loss": 0.8587, "step": 67800 }, { "epoch": 5.92, "learning_rate": 2.038605991789676e-05, "loss": 0.8497, "step": 67810 }, { "epoch": 5.92, "learning_rate": 2.0381692724255393e-05, "loss": 0.8436, "step": 67820 }, { "epoch": 5.92, "learning_rate": 2.037732553061403e-05, "loss": 0.997, "step": 67830 }, { "epoch": 5.93, "learning_rate": 2.0372958336972662e-05, "loss": 0.8604, "step": 67840 }, { "epoch": 5.93, "learning_rate": 2.03685911433313e-05, "loss": 0.8028, "step": 67850 }, { "epoch": 5.93, "learning_rate": 2.0364223949689932e-05, "loss": 0.8309, "step": 67860 }, { "epoch": 5.93, "learning_rate": 2.0359856756048565e-05, "loss": 0.8877, "step": 67870 }, { "epoch": 5.93, "learning_rate": 2.03554895624072e-05, "loss": 0.8583, "step": 67880 }, { "epoch": 5.93, "learning_rate": 2.0351122368765832e-05, "loss": 0.8747, "step": 67890 }, { "epoch": 5.93, "learning_rate": 2.0346755175124465e-05, "loss": 1.0406, "step": 67900 }, { "epoch": 5.93, "learning_rate": 2.0342387981483098e-05, "loss": 0.8038, "step": 67910 }, { "epoch": 5.93, "learning_rate": 2.033802078784173e-05, "loss": 0.9278, "step": 67920 }, { "epoch": 5.93, "learning_rate": 2.0333653594200368e-05, "loss": 0.7862, "step": 67930 }, { "epoch": 5.93, "learning_rate": 2.0329286400559e-05, "loss": 0.897, "step": 67940 }, { "epoch": 5.94, "learning_rate": 2.0324919206917638e-05, "loss": 0.972, "step": 67950 }, { "epoch": 5.94, "learning_rate": 2.032055201327627e-05, "loss": 0.841, "step": 67960 }, { "epoch": 5.94, "learning_rate": 2.0316184819634904e-05, "loss": 0.7125, "step": 67970 }, { "epoch": 5.94, "learning_rate": 2.0311817625993537e-05, "loss": 0.9759, "step": 67980 }, { "epoch": 5.94, "learning_rate": 2.030745043235217e-05, "loss": 0.8611, "step": 67990 }, { "epoch": 5.94, "learning_rate": 2.0303083238710804e-05, "loss": 1.0023, "step": 68000 }, { "epoch": 5.94, "learning_rate": 2.0298716045069437e-05, "loss": 0.8159, "step": 68010 }, { "epoch": 5.94, "learning_rate": 2.0294348851428074e-05, "loss": 0.9337, "step": 68020 }, { "epoch": 5.94, "learning_rate": 2.0289981657786707e-05, "loss": 0.935, "step": 68030 }, { "epoch": 5.94, "learning_rate": 2.028561446414534e-05, "loss": 0.9289, "step": 68040 }, { "epoch": 5.94, "learning_rate": 2.0281247270503977e-05, "loss": 0.8937, "step": 68050 }, { "epoch": 5.94, "learning_rate": 2.027688007686261e-05, "loss": 0.9102, "step": 68060 }, { "epoch": 5.95, "learning_rate": 2.0272512883221243e-05, "loss": 0.9083, "step": 68070 }, { "epoch": 5.95, "learning_rate": 2.0268145689579876e-05, "loss": 0.8618, "step": 68080 }, { "epoch": 5.95, "learning_rate": 2.026377849593851e-05, "loss": 0.8071, "step": 68090 }, { "epoch": 5.95, "learning_rate": 2.0259411302297146e-05, "loss": 0.8677, "step": 68100 }, { "epoch": 5.95, "learning_rate": 2.025504410865578e-05, "loss": 0.8563, "step": 68110 }, { "epoch": 5.95, "learning_rate": 2.0250676915014412e-05, "loss": 0.8873, "step": 68120 }, { "epoch": 5.95, "learning_rate": 2.0246309721373046e-05, "loss": 0.9589, "step": 68130 }, { "epoch": 5.95, "learning_rate": 2.024194252773168e-05, "loss": 0.8363, "step": 68140 }, { "epoch": 5.95, "learning_rate": 2.0237575334090315e-05, "loss": 0.9214, "step": 68150 }, { "epoch": 5.95, "learning_rate": 2.023320814044895e-05, "loss": 0.896, "step": 68160 }, { "epoch": 5.95, "learning_rate": 2.0228840946807582e-05, "loss": 0.9569, "step": 68170 }, { "epoch": 5.96, "learning_rate": 2.022447375316622e-05, "loss": 0.8968, "step": 68180 }, { "epoch": 5.96, "learning_rate": 2.022010655952485e-05, "loss": 0.8118, "step": 68190 }, { "epoch": 5.96, "learning_rate": 2.0215739365883485e-05, "loss": 0.7574, "step": 68200 }, { "epoch": 5.96, "learning_rate": 2.0211372172242118e-05, "loss": 0.9586, "step": 68210 }, { "epoch": 5.96, "learning_rate": 2.020700497860075e-05, "loss": 0.9394, "step": 68220 }, { "epoch": 5.96, "learning_rate": 2.0202637784959384e-05, "loss": 0.7614, "step": 68230 }, { "epoch": 5.96, "learning_rate": 2.0198270591318018e-05, "loss": 0.8946, "step": 68240 }, { "epoch": 5.96, "learning_rate": 2.0193903397676654e-05, "loss": 1.0029, "step": 68250 }, { "epoch": 5.96, "learning_rate": 2.018953620403529e-05, "loss": 0.8252, "step": 68260 }, { "epoch": 5.96, "learning_rate": 2.0185169010393924e-05, "loss": 0.8771, "step": 68270 }, { "epoch": 5.96, "learning_rate": 2.0180801816752557e-05, "loss": 0.878, "step": 68280 }, { "epoch": 5.96, "learning_rate": 2.017643462311119e-05, "loss": 0.929, "step": 68290 }, { "epoch": 5.97, "learning_rate": 2.0172067429469824e-05, "loss": 0.9534, "step": 68300 }, { "epoch": 5.97, "learning_rate": 2.0167700235828457e-05, "loss": 0.8416, "step": 68310 }, { "epoch": 5.97, "learning_rate": 2.016333304218709e-05, "loss": 0.9456, "step": 68320 }, { "epoch": 5.97, "learning_rate": 2.0158965848545723e-05, "loss": 0.8353, "step": 68330 }, { "epoch": 5.97, "learning_rate": 2.0154598654904356e-05, "loss": 1.0335, "step": 68340 }, { "epoch": 5.97, "learning_rate": 2.0150231461262993e-05, "loss": 0.8986, "step": 68350 }, { "epoch": 5.97, "learning_rate": 2.014586426762163e-05, "loss": 1.0041, "step": 68360 }, { "epoch": 5.97, "learning_rate": 2.0141497073980263e-05, "loss": 0.9592, "step": 68370 }, { "epoch": 5.97, "learning_rate": 2.0137129880338896e-05, "loss": 0.8557, "step": 68380 }, { "epoch": 5.97, "learning_rate": 2.013276268669753e-05, "loss": 0.8939, "step": 68390 }, { "epoch": 5.97, "learning_rate": 2.0128395493056162e-05, "loss": 0.9988, "step": 68400 }, { "epoch": 5.98, "learning_rate": 2.0124028299414796e-05, "loss": 0.9583, "step": 68410 }, { "epoch": 5.98, "learning_rate": 2.011966110577343e-05, "loss": 0.8992, "step": 68420 }, { "epoch": 5.98, "learning_rate": 2.0115293912132065e-05, "loss": 0.8084, "step": 68430 }, { "epoch": 5.98, "learning_rate": 2.01109267184907e-05, "loss": 0.9886, "step": 68440 }, { "epoch": 5.98, "learning_rate": 2.0106559524849332e-05, "loss": 0.7764, "step": 68450 }, { "epoch": 5.98, "learning_rate": 2.010219233120797e-05, "loss": 0.8561, "step": 68460 }, { "epoch": 5.98, "learning_rate": 2.00978251375666e-05, "loss": 0.8658, "step": 68470 }, { "epoch": 5.98, "learning_rate": 2.0093457943925235e-05, "loss": 0.8182, "step": 68480 }, { "epoch": 5.98, "learning_rate": 2.0089090750283868e-05, "loss": 0.8726, "step": 68490 }, { "epoch": 5.98, "learning_rate": 2.00847235566425e-05, "loss": 0.9074, "step": 68500 }, { "epoch": 5.98, "learning_rate": 2.0080356363001138e-05, "loss": 0.9354, "step": 68510 }, { "epoch": 5.98, "learning_rate": 2.007598916935977e-05, "loss": 1.0501, "step": 68520 }, { "epoch": 5.99, "learning_rate": 2.0071621975718404e-05, "loss": 0.7622, "step": 68530 }, { "epoch": 5.99, "learning_rate": 2.0067254782077037e-05, "loss": 0.9299, "step": 68540 }, { "epoch": 5.99, "learning_rate": 2.006288758843567e-05, "loss": 0.9101, "step": 68550 }, { "epoch": 5.99, "learning_rate": 2.0058520394794307e-05, "loss": 0.7986, "step": 68560 }, { "epoch": 5.99, "learning_rate": 2.005415320115294e-05, "loss": 0.8426, "step": 68570 }, { "epoch": 5.99, "learning_rate": 2.0049786007511574e-05, "loss": 0.8381, "step": 68580 }, { "epoch": 5.99, "learning_rate": 2.004541881387021e-05, "loss": 0.8721, "step": 68590 }, { "epoch": 5.99, "learning_rate": 2.0041051620228843e-05, "loss": 0.9456, "step": 68600 }, { "epoch": 5.99, "learning_rate": 2.0036684426587477e-05, "loss": 1.0116, "step": 68610 }, { "epoch": 5.99, "learning_rate": 2.003231723294611e-05, "loss": 0.9399, "step": 68620 }, { "epoch": 5.99, "learning_rate": 2.0027950039304743e-05, "loss": 0.853, "step": 68630 }, { "epoch": 6.0, "learning_rate": 2.0023582845663376e-05, "loss": 0.876, "step": 68640 }, { "epoch": 6.0, "learning_rate": 2.001921565202201e-05, "loss": 0.8249, "step": 68650 }, { "epoch": 6.0, "learning_rate": 2.0014848458380646e-05, "loss": 0.8488, "step": 68660 }, { "epoch": 6.0, "learning_rate": 2.001048126473928e-05, "loss": 0.8544, "step": 68670 }, { "epoch": 6.0, "learning_rate": 2.0006114071097916e-05, "loss": 0.8857, "step": 68680 }, { "epoch": 6.0, "learning_rate": 2.000174687745655e-05, "loss": 0.8597, "step": 68690 }, { "epoch": 6.0, "learning_rate": 1.9997379683815182e-05, "loss": 0.8726, "step": 68700 }, { "epoch": 6.0, "learning_rate": 1.9993012490173815e-05, "loss": 0.8557, "step": 68710 }, { "epoch": 6.0, "learning_rate": 1.998864529653245e-05, "loss": 0.9142, "step": 68720 }, { "epoch": 6.0, "learning_rate": 1.9984278102891082e-05, "loss": 0.9154, "step": 68730 }, { "epoch": 6.0, "learning_rate": 1.9979910909249715e-05, "loss": 0.9844, "step": 68740 }, { "epoch": 6.0, "learning_rate": 1.9975543715608348e-05, "loss": 0.793, "step": 68750 }, { "epoch": 6.01, "learning_rate": 1.9971176521966985e-05, "loss": 0.986, "step": 68760 }, { "epoch": 6.01, "learning_rate": 1.9966809328325618e-05, "loss": 0.8291, "step": 68770 }, { "epoch": 6.01, "learning_rate": 1.9962442134684255e-05, "loss": 0.9167, "step": 68780 }, { "epoch": 6.01, "learning_rate": 1.9958074941042888e-05, "loss": 0.8435, "step": 68790 }, { "epoch": 6.01, "learning_rate": 1.995370774740152e-05, "loss": 0.8235, "step": 68800 }, { "epoch": 6.01, "learning_rate": 1.9949340553760154e-05, "loss": 0.9693, "step": 68810 }, { "epoch": 6.01, "learning_rate": 1.9944973360118787e-05, "loss": 0.8523, "step": 68820 }, { "epoch": 6.01, "learning_rate": 1.994060616647742e-05, "loss": 0.8525, "step": 68830 }, { "epoch": 6.01, "learning_rate": 1.9936238972836057e-05, "loss": 0.7209, "step": 68840 }, { "epoch": 6.01, "learning_rate": 1.993187177919469e-05, "loss": 0.8145, "step": 68850 }, { "epoch": 6.01, "learning_rate": 1.9927504585553324e-05, "loss": 0.7962, "step": 68860 }, { "epoch": 6.02, "learning_rate": 1.9923137391911957e-05, "loss": 0.8256, "step": 68870 }, { "epoch": 6.02, "learning_rate": 1.9918770198270593e-05, "loss": 0.8003, "step": 68880 }, { "epoch": 6.02, "learning_rate": 1.9914403004629227e-05, "loss": 0.9131, "step": 68890 }, { "epoch": 6.02, "learning_rate": 1.991003581098786e-05, "loss": 0.8167, "step": 68900 }, { "epoch": 6.02, "learning_rate": 1.9905668617346493e-05, "loss": 0.9962, "step": 68910 }, { "epoch": 6.02, "learning_rate": 1.990130142370513e-05, "loss": 0.9617, "step": 68920 }, { "epoch": 6.02, "learning_rate": 1.9896934230063763e-05, "loss": 0.8688, "step": 68930 }, { "epoch": 6.02, "learning_rate": 1.9892567036422396e-05, "loss": 0.7859, "step": 68940 }, { "epoch": 6.02, "learning_rate": 1.988819984278103e-05, "loss": 0.8181, "step": 68950 }, { "epoch": 6.02, "learning_rate": 1.9883832649139662e-05, "loss": 0.9432, "step": 68960 }, { "epoch": 6.02, "learning_rate": 1.9879465455498296e-05, "loss": 0.9034, "step": 68970 }, { "epoch": 6.02, "learning_rate": 1.9875098261856932e-05, "loss": 0.9232, "step": 68980 }, { "epoch": 6.03, "learning_rate": 1.9870731068215565e-05, "loss": 0.9909, "step": 68990 }, { "epoch": 6.03, "learning_rate": 1.9866363874574202e-05, "loss": 0.9979, "step": 69000 }, { "epoch": 6.03, "learning_rate": 1.9861996680932835e-05, "loss": 1.0326, "step": 69010 }, { "epoch": 6.03, "learning_rate": 1.985762948729147e-05, "loss": 0.8295, "step": 69020 }, { "epoch": 6.03, "learning_rate": 1.98532622936501e-05, "loss": 0.8812, "step": 69030 }, { "epoch": 6.03, "learning_rate": 1.9848895100008735e-05, "loss": 0.9626, "step": 69040 }, { "epoch": 6.03, "learning_rate": 1.9844527906367368e-05, "loss": 0.8018, "step": 69050 }, { "epoch": 6.03, "learning_rate": 1.9840160712726e-05, "loss": 0.8952, "step": 69060 }, { "epoch": 6.03, "learning_rate": 1.9835793519084634e-05, "loss": 0.9881, "step": 69070 }, { "epoch": 6.03, "learning_rate": 1.983142632544327e-05, "loss": 0.896, "step": 69080 }, { "epoch": 6.03, "learning_rate": 1.9827059131801908e-05, "loss": 0.861, "step": 69090 }, { "epoch": 6.04, "learning_rate": 1.982269193816054e-05, "loss": 0.7244, "step": 69100 }, { "epoch": 6.04, "learning_rate": 1.9818324744519174e-05, "loss": 0.8356, "step": 69110 }, { "epoch": 6.04, "learning_rate": 1.9813957550877807e-05, "loss": 0.9232, "step": 69120 }, { "epoch": 6.04, "learning_rate": 1.980959035723644e-05, "loss": 0.7408, "step": 69130 }, { "epoch": 6.04, "learning_rate": 1.9805223163595074e-05, "loss": 0.9454, "step": 69140 }, { "epoch": 6.04, "learning_rate": 1.9800855969953707e-05, "loss": 0.89, "step": 69150 }, { "epoch": 6.04, "learning_rate": 1.979648877631234e-05, "loss": 0.9232, "step": 69160 }, { "epoch": 6.04, "learning_rate": 1.9792121582670977e-05, "loss": 0.931, "step": 69170 }, { "epoch": 6.04, "learning_rate": 1.978775438902961e-05, "loss": 0.9144, "step": 69180 }, { "epoch": 6.04, "learning_rate": 1.9783387195388246e-05, "loss": 0.9641, "step": 69190 }, { "epoch": 6.04, "learning_rate": 1.977902000174688e-05, "loss": 0.9481, "step": 69200 }, { "epoch": 6.05, "learning_rate": 1.9774652808105513e-05, "loss": 0.8291, "step": 69210 }, { "epoch": 6.05, "learning_rate": 1.9770285614464146e-05, "loss": 0.8412, "step": 69220 }, { "epoch": 6.05, "learning_rate": 1.976591842082278e-05, "loss": 0.8878, "step": 69230 }, { "epoch": 6.05, "learning_rate": 1.9761551227181412e-05, "loss": 0.8663, "step": 69240 }, { "epoch": 6.05, "learning_rate": 1.975718403354005e-05, "loss": 0.9247, "step": 69250 }, { "epoch": 6.05, "learning_rate": 1.9752816839898682e-05, "loss": 0.842, "step": 69260 }, { "epoch": 6.05, "learning_rate": 1.9748449646257315e-05, "loss": 0.9291, "step": 69270 }, { "epoch": 6.05, "learning_rate": 1.974408245261595e-05, "loss": 0.8929, "step": 69280 }, { "epoch": 6.05, "learning_rate": 1.9739715258974585e-05, "loss": 0.7749, "step": 69290 }, { "epoch": 6.05, "learning_rate": 1.973534806533322e-05, "loss": 0.9608, "step": 69300 }, { "epoch": 6.05, "learning_rate": 1.973098087169185e-05, "loss": 0.9606, "step": 69310 }, { "epoch": 6.05, "learning_rate": 1.9726613678050485e-05, "loss": 0.8037, "step": 69320 }, { "epoch": 6.06, "learning_rate": 1.972224648440912e-05, "loss": 0.8471, "step": 69330 }, { "epoch": 6.06, "learning_rate": 1.9717879290767755e-05, "loss": 0.8469, "step": 69340 }, { "epoch": 6.06, "learning_rate": 1.9713512097126388e-05, "loss": 0.8054, "step": 69350 }, { "epoch": 6.06, "learning_rate": 1.970914490348502e-05, "loss": 0.8394, "step": 69360 }, { "epoch": 6.06, "learning_rate": 1.9704777709843654e-05, "loss": 0.9491, "step": 69370 }, { "epoch": 6.06, "learning_rate": 1.9700410516202287e-05, "loss": 0.9855, "step": 69380 }, { "epoch": 6.06, "learning_rate": 1.9696043322560924e-05, "loss": 0.9493, "step": 69390 }, { "epoch": 6.06, "learning_rate": 1.9691676128919557e-05, "loss": 0.8749, "step": 69400 }, { "epoch": 6.06, "learning_rate": 1.9687308935278194e-05, "loss": 1.0075, "step": 69410 }, { "epoch": 6.06, "learning_rate": 1.9682941741636827e-05, "loss": 0.8835, "step": 69420 }, { "epoch": 6.06, "learning_rate": 1.967857454799546e-05, "loss": 0.8739, "step": 69430 }, { "epoch": 6.07, "learning_rate": 1.9674207354354093e-05, "loss": 0.8637, "step": 69440 }, { "epoch": 6.07, "learning_rate": 1.9669840160712727e-05, "loss": 0.8977, "step": 69450 }, { "epoch": 6.07, "learning_rate": 1.966547296707136e-05, "loss": 0.8879, "step": 69460 }, { "epoch": 6.07, "learning_rate": 1.9661105773429993e-05, "loss": 0.8788, "step": 69470 }, { "epoch": 6.07, "learning_rate": 1.9656738579788626e-05, "loss": 0.9032, "step": 69480 }, { "epoch": 6.07, "learning_rate": 1.9652371386147263e-05, "loss": 0.9666, "step": 69490 }, { "epoch": 6.07, "learning_rate": 1.9648004192505896e-05, "loss": 0.748, "step": 69500 }, { "epoch": 6.07, "learning_rate": 1.9643636998864533e-05, "loss": 0.8258, "step": 69510 }, { "epoch": 6.07, "learning_rate": 1.9639269805223166e-05, "loss": 0.9577, "step": 69520 }, { "epoch": 6.07, "learning_rate": 1.96349026115818e-05, "loss": 0.9315, "step": 69530 }, { "epoch": 6.07, "learning_rate": 1.9630535417940432e-05, "loss": 0.8264, "step": 69540 }, { "epoch": 6.07, "learning_rate": 1.9626168224299065e-05, "loss": 0.7973, "step": 69550 }, { "epoch": 6.08, "learning_rate": 1.96218010306577e-05, "loss": 0.8852, "step": 69560 }, { "epoch": 6.08, "learning_rate": 1.9617433837016332e-05, "loss": 0.7535, "step": 69570 }, { "epoch": 6.08, "learning_rate": 1.961306664337497e-05, "loss": 0.8905, "step": 69580 }, { "epoch": 6.08, "learning_rate": 1.96086994497336e-05, "loss": 0.9265, "step": 69590 }, { "epoch": 6.08, "learning_rate": 1.9604332256092235e-05, "loss": 0.8851, "step": 69600 }, { "epoch": 6.08, "learning_rate": 1.959996506245087e-05, "loss": 0.9216, "step": 69610 }, { "epoch": 6.08, "learning_rate": 1.9595597868809505e-05, "loss": 1.0451, "step": 69620 }, { "epoch": 6.08, "learning_rate": 1.9591230675168138e-05, "loss": 0.8671, "step": 69630 }, { "epoch": 6.08, "learning_rate": 1.958686348152677e-05, "loss": 0.8727, "step": 69640 }, { "epoch": 6.08, "learning_rate": 1.9582496287885404e-05, "loss": 0.8041, "step": 69650 }, { "epoch": 6.08, "learning_rate": 1.957812909424404e-05, "loss": 0.8202, "step": 69660 }, { "epoch": 6.09, "learning_rate": 1.9573761900602674e-05, "loss": 0.8211, "step": 69670 }, { "epoch": 6.09, "learning_rate": 1.9569394706961307e-05, "loss": 1.001, "step": 69680 }, { "epoch": 6.09, "learning_rate": 1.956502751331994e-05, "loss": 0.8733, "step": 69690 }, { "epoch": 6.09, "learning_rate": 1.9560660319678574e-05, "loss": 0.9277, "step": 69700 }, { "epoch": 6.09, "learning_rate": 1.955629312603721e-05, "loss": 0.8795, "step": 69710 }, { "epoch": 6.09, "learning_rate": 1.9551925932395843e-05, "loss": 0.9602, "step": 69720 }, { "epoch": 6.09, "learning_rate": 1.9547558738754477e-05, "loss": 0.9111, "step": 69730 }, { "epoch": 6.09, "learning_rate": 1.9543191545113113e-05, "loss": 0.8344, "step": 69740 }, { "epoch": 6.09, "learning_rate": 1.9538824351471746e-05, "loss": 0.8349, "step": 69750 }, { "epoch": 6.09, "learning_rate": 1.953445715783038e-05, "loss": 0.8396, "step": 69760 }, { "epoch": 6.09, "learning_rate": 1.9530089964189013e-05, "loss": 0.9342, "step": 69770 }, { "epoch": 6.09, "learning_rate": 1.9525722770547646e-05, "loss": 0.7862, "step": 69780 }, { "epoch": 6.1, "learning_rate": 1.952135557690628e-05, "loss": 0.9088, "step": 69790 }, { "epoch": 6.1, "learning_rate": 1.9516988383264916e-05, "loss": 0.9102, "step": 69800 }, { "epoch": 6.1, "learning_rate": 1.951262118962355e-05, "loss": 0.7978, "step": 69810 }, { "epoch": 6.1, "learning_rate": 1.9508253995982186e-05, "loss": 0.9119, "step": 69820 }, { "epoch": 6.1, "learning_rate": 1.950388680234082e-05, "loss": 0.8614, "step": 69830 }, { "epoch": 6.1, "learning_rate": 1.9499519608699452e-05, "loss": 0.8301, "step": 69840 }, { "epoch": 6.1, "learning_rate": 1.9495152415058085e-05, "loss": 0.8654, "step": 69850 }, { "epoch": 6.1, "learning_rate": 1.949078522141672e-05, "loss": 0.9119, "step": 69860 }, { "epoch": 6.1, "learning_rate": 1.948641802777535e-05, "loss": 0.7826, "step": 69870 }, { "epoch": 6.1, "learning_rate": 1.9482050834133985e-05, "loss": 0.9123, "step": 69880 }, { "epoch": 6.1, "learning_rate": 1.9477683640492618e-05, "loss": 0.9048, "step": 69890 }, { "epoch": 6.11, "learning_rate": 1.9473316446851255e-05, "loss": 0.7586, "step": 69900 }, { "epoch": 6.11, "learning_rate": 1.9468949253209888e-05, "loss": 0.8915, "step": 69910 }, { "epoch": 6.11, "learning_rate": 1.9464582059568524e-05, "loss": 0.8396, "step": 69920 }, { "epoch": 6.11, "learning_rate": 1.9460214865927158e-05, "loss": 0.8323, "step": 69930 }, { "epoch": 6.11, "learning_rate": 1.945584767228579e-05, "loss": 0.8805, "step": 69940 }, { "epoch": 6.11, "learning_rate": 1.9451480478644424e-05, "loss": 0.9406, "step": 69950 }, { "epoch": 6.11, "learning_rate": 1.9447113285003057e-05, "loss": 1.0179, "step": 69960 }, { "epoch": 6.11, "learning_rate": 1.944274609136169e-05, "loss": 0.901, "step": 69970 }, { "epoch": 6.11, "learning_rate": 1.9438378897720324e-05, "loss": 0.9752, "step": 69980 }, { "epoch": 6.11, "learning_rate": 1.943401170407896e-05, "loss": 0.7614, "step": 69990 }, { "epoch": 6.11, "learning_rate": 1.9429644510437593e-05, "loss": 0.8814, "step": 70000 }, { "epoch": 6.11, "eval_accuracy": 0.5812125380760539, "eval_loss": 0.8769951462745667, "eval_runtime": 84.05, "eval_samples_per_second": 121.083, "eval_steps_per_second": 15.146, "step": 70000 }, { "epoch": 6.11, "learning_rate": 1.9425277316796227e-05, "loss": 0.8498, "step": 70010 }, { "epoch": 6.12, "learning_rate": 1.9420910123154863e-05, "loss": 0.8991, "step": 70020 }, { "epoch": 6.12, "learning_rate": 1.9416542929513496e-05, "loss": 0.8439, "step": 70030 }, { "epoch": 6.12, "learning_rate": 1.941217573587213e-05, "loss": 0.9757, "step": 70040 }, { "epoch": 6.12, "learning_rate": 1.9407808542230763e-05, "loss": 0.7676, "step": 70050 }, { "epoch": 6.12, "learning_rate": 1.9403441348589396e-05, "loss": 0.7057, "step": 70060 }, { "epoch": 6.12, "learning_rate": 1.9399074154948033e-05, "loss": 0.8181, "step": 70070 }, { "epoch": 6.12, "learning_rate": 1.9394706961306666e-05, "loss": 0.8511, "step": 70080 }, { "epoch": 6.12, "learning_rate": 1.93903397676653e-05, "loss": 0.9156, "step": 70090 }, { "epoch": 6.12, "learning_rate": 1.9385972574023932e-05, "loss": 0.8684, "step": 70100 }, { "epoch": 6.12, "learning_rate": 1.9381605380382565e-05, "loss": 0.7832, "step": 70110 }, { "epoch": 6.12, "learning_rate": 1.9377238186741202e-05, "loss": 0.905, "step": 70120 }, { "epoch": 6.13, "learning_rate": 1.9372870993099835e-05, "loss": 0.8566, "step": 70130 }, { "epoch": 6.13, "learning_rate": 1.936850379945847e-05, "loss": 0.9473, "step": 70140 }, { "epoch": 6.13, "learning_rate": 1.9364136605817105e-05, "loss": 0.8671, "step": 70150 }, { "epoch": 6.13, "learning_rate": 1.9359769412175738e-05, "loss": 0.8088, "step": 70160 }, { "epoch": 6.13, "learning_rate": 1.935540221853437e-05, "loss": 0.7692, "step": 70170 }, { "epoch": 6.13, "learning_rate": 1.9351035024893005e-05, "loss": 0.8393, "step": 70180 }, { "epoch": 6.13, "learning_rate": 1.9346667831251638e-05, "loss": 0.8832, "step": 70190 }, { "epoch": 6.13, "learning_rate": 1.934230063761027e-05, "loss": 0.7916, "step": 70200 }, { "epoch": 6.13, "learning_rate": 1.9337933443968904e-05, "loss": 0.802, "step": 70210 }, { "epoch": 6.13, "learning_rate": 1.933356625032754e-05, "loss": 0.947, "step": 70220 }, { "epoch": 6.13, "learning_rate": 1.9329199056686174e-05, "loss": 0.9078, "step": 70230 }, { "epoch": 6.14, "learning_rate": 1.932483186304481e-05, "loss": 0.7956, "step": 70240 }, { "epoch": 6.14, "learning_rate": 1.9320464669403444e-05, "loss": 0.7777, "step": 70250 }, { "epoch": 6.14, "learning_rate": 1.9316097475762077e-05, "loss": 0.8716, "step": 70260 }, { "epoch": 6.14, "learning_rate": 1.931173028212071e-05, "loss": 0.8126, "step": 70270 }, { "epoch": 6.14, "learning_rate": 1.9307363088479344e-05, "loss": 0.8563, "step": 70280 }, { "epoch": 6.14, "learning_rate": 1.9302995894837977e-05, "loss": 0.8884, "step": 70290 }, { "epoch": 6.14, "learning_rate": 1.929862870119661e-05, "loss": 0.8525, "step": 70300 }, { "epoch": 6.14, "learning_rate": 1.9294261507555243e-05, "loss": 0.6437, "step": 70310 }, { "epoch": 6.14, "learning_rate": 1.928989431391388e-05, "loss": 0.8547, "step": 70320 }, { "epoch": 6.14, "learning_rate": 1.9285527120272516e-05, "loss": 0.974, "step": 70330 }, { "epoch": 6.14, "learning_rate": 1.928115992663115e-05, "loss": 0.8762, "step": 70340 }, { "epoch": 6.14, "learning_rate": 1.9276792732989783e-05, "loss": 0.8728, "step": 70350 }, { "epoch": 6.15, "learning_rate": 1.9272425539348416e-05, "loss": 0.867, "step": 70360 }, { "epoch": 6.15, "learning_rate": 1.926805834570705e-05, "loss": 0.9729, "step": 70370 }, { "epoch": 6.15, "learning_rate": 1.9263691152065682e-05, "loss": 0.9126, "step": 70380 }, { "epoch": 6.15, "learning_rate": 1.9259323958424316e-05, "loss": 0.9026, "step": 70390 }, { "epoch": 6.15, "learning_rate": 1.9254956764782952e-05, "loss": 0.9176, "step": 70400 }, { "epoch": 6.15, "learning_rate": 1.9250589571141585e-05, "loss": 0.7294, "step": 70410 }, { "epoch": 6.15, "learning_rate": 1.924622237750022e-05, "loss": 0.8472, "step": 70420 }, { "epoch": 6.15, "learning_rate": 1.9241855183858855e-05, "loss": 0.9356, "step": 70430 }, { "epoch": 6.15, "learning_rate": 1.9237487990217488e-05, "loss": 0.869, "step": 70440 }, { "epoch": 6.15, "learning_rate": 1.923312079657612e-05, "loss": 0.915, "step": 70450 }, { "epoch": 6.15, "learning_rate": 1.9228753602934755e-05, "loss": 0.8813, "step": 70460 }, { "epoch": 6.16, "learning_rate": 1.9224386409293388e-05, "loss": 0.7442, "step": 70470 }, { "epoch": 6.16, "learning_rate": 1.9220019215652025e-05, "loss": 0.8581, "step": 70480 }, { "epoch": 6.16, "learning_rate": 1.9215652022010658e-05, "loss": 0.8677, "step": 70490 }, { "epoch": 6.16, "learning_rate": 1.921128482836929e-05, "loss": 0.9229, "step": 70500 }, { "epoch": 6.16, "learning_rate": 1.9206917634727924e-05, "loss": 0.9982, "step": 70510 }, { "epoch": 6.16, "learning_rate": 1.9202550441086557e-05, "loss": 0.8943, "step": 70520 }, { "epoch": 6.16, "learning_rate": 1.9198183247445194e-05, "loss": 0.8628, "step": 70530 }, { "epoch": 6.16, "learning_rate": 1.9193816053803827e-05, "loss": 0.8891, "step": 70540 }, { "epoch": 6.16, "learning_rate": 1.918944886016246e-05, "loss": 0.8481, "step": 70550 }, { "epoch": 6.16, "learning_rate": 1.9185081666521097e-05, "loss": 0.8581, "step": 70560 }, { "epoch": 6.16, "learning_rate": 1.918071447287973e-05, "loss": 0.8054, "step": 70570 }, { "epoch": 6.16, "learning_rate": 1.9176347279238363e-05, "loss": 0.8904, "step": 70580 }, { "epoch": 6.17, "learning_rate": 1.9171980085596997e-05, "loss": 0.9202, "step": 70590 }, { "epoch": 6.17, "learning_rate": 1.916761289195563e-05, "loss": 0.9063, "step": 70600 }, { "epoch": 6.17, "learning_rate": 1.9163245698314263e-05, "loss": 0.8588, "step": 70610 }, { "epoch": 6.17, "learning_rate": 1.9158878504672896e-05, "loss": 0.9659, "step": 70620 }, { "epoch": 6.17, "learning_rate": 1.9154511311031533e-05, "loss": 0.8758, "step": 70630 }, { "epoch": 6.17, "learning_rate": 1.9150144117390166e-05, "loss": 0.917, "step": 70640 }, { "epoch": 6.17, "learning_rate": 1.9145776923748803e-05, "loss": 0.9229, "step": 70650 }, { "epoch": 6.17, "learning_rate": 1.9141409730107436e-05, "loss": 0.8907, "step": 70660 }, { "epoch": 6.17, "learning_rate": 1.913704253646607e-05, "loss": 0.8604, "step": 70670 }, { "epoch": 6.17, "learning_rate": 1.9132675342824702e-05, "loss": 0.7954, "step": 70680 }, { "epoch": 6.17, "learning_rate": 1.9128308149183335e-05, "loss": 0.9043, "step": 70690 }, { "epoch": 6.18, "learning_rate": 1.912394095554197e-05, "loss": 0.8808, "step": 70700 }, { "epoch": 6.18, "learning_rate": 1.9119573761900602e-05, "loss": 0.8596, "step": 70710 }, { "epoch": 6.18, "learning_rate": 1.9115206568259235e-05, "loss": 0.8394, "step": 70720 }, { "epoch": 6.18, "learning_rate": 1.911083937461787e-05, "loss": 0.9052, "step": 70730 }, { "epoch": 6.18, "learning_rate": 1.9106472180976505e-05, "loss": 0.8428, "step": 70740 }, { "epoch": 6.18, "learning_rate": 1.910210498733514e-05, "loss": 0.9675, "step": 70750 }, { "epoch": 6.18, "learning_rate": 1.9097737793693775e-05, "loss": 0.8523, "step": 70760 }, { "epoch": 6.18, "learning_rate": 1.9093370600052408e-05, "loss": 0.8443, "step": 70770 }, { "epoch": 6.18, "learning_rate": 1.908900340641104e-05, "loss": 0.8346, "step": 70780 }, { "epoch": 6.18, "learning_rate": 1.9084636212769674e-05, "loss": 0.9896, "step": 70790 }, { "epoch": 6.18, "learning_rate": 1.9080269019128307e-05, "loss": 0.8894, "step": 70800 }, { "epoch": 6.18, "learning_rate": 1.9075901825486944e-05, "loss": 0.7918, "step": 70810 }, { "epoch": 6.19, "learning_rate": 1.9071534631845577e-05, "loss": 0.9251, "step": 70820 }, { "epoch": 6.19, "learning_rate": 1.906716743820421e-05, "loss": 0.8719, "step": 70830 }, { "epoch": 6.19, "learning_rate": 1.9062800244562844e-05, "loss": 0.926, "step": 70840 }, { "epoch": 6.19, "learning_rate": 1.905843305092148e-05, "loss": 0.8264, "step": 70850 }, { "epoch": 6.19, "learning_rate": 1.9054065857280113e-05, "loss": 0.94, "step": 70860 }, { "epoch": 6.19, "learning_rate": 1.9049698663638747e-05, "loss": 0.853, "step": 70870 }, { "epoch": 6.19, "learning_rate": 1.904533146999738e-05, "loss": 0.9861, "step": 70880 }, { "epoch": 6.19, "learning_rate": 1.9040964276356016e-05, "loss": 0.9068, "step": 70890 }, { "epoch": 6.19, "learning_rate": 1.903659708271465e-05, "loss": 0.899, "step": 70900 }, { "epoch": 6.19, "learning_rate": 1.9032229889073283e-05, "loss": 0.9175, "step": 70910 }, { "epoch": 6.19, "learning_rate": 1.9027862695431916e-05, "loss": 0.9153, "step": 70920 }, { "epoch": 6.2, "learning_rate": 1.902349550179055e-05, "loss": 0.9063, "step": 70930 }, { "epoch": 6.2, "learning_rate": 1.9019128308149182e-05, "loss": 0.8902, "step": 70940 }, { "epoch": 6.2, "learning_rate": 1.901476111450782e-05, "loss": 0.8754, "step": 70950 }, { "epoch": 6.2, "learning_rate": 1.9010393920866452e-05, "loss": 0.8807, "step": 70960 }, { "epoch": 6.2, "learning_rate": 1.9006026727225085e-05, "loss": 0.9675, "step": 70970 }, { "epoch": 6.2, "learning_rate": 1.9001659533583722e-05, "loss": 0.8957, "step": 70980 }, { "epoch": 6.2, "learning_rate": 1.8997292339942355e-05, "loss": 0.8237, "step": 70990 }, { "epoch": 6.2, "learning_rate": 1.899292514630099e-05, "loss": 0.914, "step": 71000 }, { "epoch": 6.2, "learning_rate": 1.898855795265962e-05, "loss": 0.8065, "step": 71010 }, { "epoch": 6.2, "learning_rate": 1.8984190759018255e-05, "loss": 0.9236, "step": 71020 }, { "epoch": 6.2, "learning_rate": 1.8979823565376888e-05, "loss": 0.8438, "step": 71030 }, { "epoch": 6.2, "learning_rate": 1.897545637173552e-05, "loss": 1.0532, "step": 71040 }, { "epoch": 6.21, "learning_rate": 1.8971089178094158e-05, "loss": 0.8273, "step": 71050 }, { "epoch": 6.21, "learning_rate": 1.8966721984452794e-05, "loss": 0.827, "step": 71060 }, { "epoch": 6.21, "learning_rate": 1.8962354790811428e-05, "loss": 0.7816, "step": 71070 }, { "epoch": 6.21, "learning_rate": 1.895798759717006e-05, "loss": 0.7833, "step": 71080 }, { "epoch": 6.21, "learning_rate": 1.8953620403528694e-05, "loss": 0.8946, "step": 71090 }, { "epoch": 6.21, "learning_rate": 1.8949253209887327e-05, "loss": 0.946, "step": 71100 }, { "epoch": 6.21, "learning_rate": 1.894488601624596e-05, "loss": 0.8813, "step": 71110 }, { "epoch": 6.21, "learning_rate": 1.8940518822604594e-05, "loss": 0.9423, "step": 71120 }, { "epoch": 6.21, "learning_rate": 1.8936151628963227e-05, "loss": 0.7779, "step": 71130 }, { "epoch": 6.21, "learning_rate": 1.8931784435321863e-05, "loss": 0.8715, "step": 71140 }, { "epoch": 6.21, "learning_rate": 1.8927417241680497e-05, "loss": 0.8557, "step": 71150 }, { "epoch": 6.22, "learning_rate": 1.8923050048039133e-05, "loss": 0.785, "step": 71160 }, { "epoch": 6.22, "learning_rate": 1.8918682854397766e-05, "loss": 0.7611, "step": 71170 }, { "epoch": 6.22, "learning_rate": 1.89143156607564e-05, "loss": 0.8856, "step": 71180 }, { "epoch": 6.22, "learning_rate": 1.8909948467115033e-05, "loss": 0.7014, "step": 71190 }, { "epoch": 6.22, "learning_rate": 1.8905581273473666e-05, "loss": 0.7842, "step": 71200 }, { "epoch": 6.22, "learning_rate": 1.89012140798323e-05, "loss": 0.8164, "step": 71210 }, { "epoch": 6.22, "learning_rate": 1.8896846886190936e-05, "loss": 0.8535, "step": 71220 }, { "epoch": 6.22, "learning_rate": 1.889247969254957e-05, "loss": 0.8518, "step": 71230 }, { "epoch": 6.22, "learning_rate": 1.8888112498908202e-05, "loss": 0.9796, "step": 71240 }, { "epoch": 6.22, "learning_rate": 1.8883745305266835e-05, "loss": 0.8905, "step": 71250 }, { "epoch": 6.22, "learning_rate": 1.8879378111625472e-05, "loss": 0.809, "step": 71260 }, { "epoch": 6.22, "learning_rate": 1.8875010917984105e-05, "loss": 0.7956, "step": 71270 }, { "epoch": 6.23, "learning_rate": 1.887064372434274e-05, "loss": 0.9985, "step": 71280 }, { "epoch": 6.23, "learning_rate": 1.886627653070137e-05, "loss": 0.9149, "step": 71290 }, { "epoch": 6.23, "learning_rate": 1.8861909337060008e-05, "loss": 0.8343, "step": 71300 }, { "epoch": 6.23, "learning_rate": 1.885754214341864e-05, "loss": 0.8309, "step": 71310 }, { "epoch": 6.23, "learning_rate": 1.8853174949777275e-05, "loss": 0.9732, "step": 71320 }, { "epoch": 6.23, "learning_rate": 1.8848807756135908e-05, "loss": 0.9725, "step": 71330 }, { "epoch": 6.23, "learning_rate": 1.884444056249454e-05, "loss": 0.8291, "step": 71340 }, { "epoch": 6.23, "learning_rate": 1.8840073368853174e-05, "loss": 0.8985, "step": 71350 }, { "epoch": 6.23, "learning_rate": 1.883570617521181e-05, "loss": 0.8168, "step": 71360 }, { "epoch": 6.23, "learning_rate": 1.8831338981570444e-05, "loss": 0.8872, "step": 71370 }, { "epoch": 6.23, "learning_rate": 1.8826971787929077e-05, "loss": 1.0646, "step": 71380 }, { "epoch": 6.24, "learning_rate": 1.8822604594287714e-05, "loss": 0.8264, "step": 71390 }, { "epoch": 6.24, "learning_rate": 1.8818237400646347e-05, "loss": 0.8506, "step": 71400 }, { "epoch": 6.24, "learning_rate": 1.881387020700498e-05, "loss": 0.9673, "step": 71410 }, { "epoch": 6.24, "learning_rate": 1.8809503013363613e-05, "loss": 0.9954, "step": 71420 }, { "epoch": 6.24, "learning_rate": 1.8805135819722247e-05, "loss": 0.9672, "step": 71430 }, { "epoch": 6.24, "learning_rate": 1.880076862608088e-05, "loss": 0.9988, "step": 71440 }, { "epoch": 6.24, "learning_rate": 1.8796401432439513e-05, "loss": 0.8433, "step": 71450 }, { "epoch": 6.24, "learning_rate": 1.879203423879815e-05, "loss": 1.0717, "step": 71460 }, { "epoch": 6.24, "learning_rate": 1.8787667045156783e-05, "loss": 0.8962, "step": 71470 }, { "epoch": 6.24, "learning_rate": 1.878329985151542e-05, "loss": 0.8871, "step": 71480 }, { "epoch": 6.24, "learning_rate": 1.8778932657874053e-05, "loss": 1.0209, "step": 71490 }, { "epoch": 6.25, "learning_rate": 1.8774565464232686e-05, "loss": 0.8023, "step": 71500 }, { "epoch": 6.25, "learning_rate": 1.877019827059132e-05, "loss": 0.9282, "step": 71510 }, { "epoch": 6.25, "learning_rate": 1.8765831076949952e-05, "loss": 0.8532, "step": 71520 }, { "epoch": 6.25, "learning_rate": 1.8761463883308585e-05, "loss": 1.0363, "step": 71530 }, { "epoch": 6.25, "learning_rate": 1.875709668966722e-05, "loss": 0.8247, "step": 71540 }, { "epoch": 6.25, "learning_rate": 1.8752729496025855e-05, "loss": 0.8062, "step": 71550 }, { "epoch": 6.25, "learning_rate": 1.874836230238449e-05, "loss": 0.9113, "step": 71560 }, { "epoch": 6.25, "learning_rate": 1.874399510874312e-05, "loss": 0.8332, "step": 71570 }, { "epoch": 6.25, "learning_rate": 1.8739627915101758e-05, "loss": 0.953, "step": 71580 }, { "epoch": 6.25, "learning_rate": 1.873526072146039e-05, "loss": 0.9013, "step": 71590 }, { "epoch": 6.25, "learning_rate": 1.8730893527819025e-05, "loss": 1.0426, "step": 71600 }, { "epoch": 6.25, "learning_rate": 1.8726526334177658e-05, "loss": 0.8175, "step": 71610 }, { "epoch": 6.26, "learning_rate": 1.872215914053629e-05, "loss": 0.8195, "step": 71620 }, { "epoch": 6.26, "learning_rate": 1.8717791946894928e-05, "loss": 0.8412, "step": 71630 }, { "epoch": 6.26, "learning_rate": 1.871342475325356e-05, "loss": 0.8625, "step": 71640 }, { "epoch": 6.26, "learning_rate": 1.8709057559612194e-05, "loss": 0.7267, "step": 71650 }, { "epoch": 6.26, "learning_rate": 1.8704690365970827e-05, "loss": 0.8372, "step": 71660 }, { "epoch": 6.26, "learning_rate": 1.870032317232946e-05, "loss": 0.8501, "step": 71670 }, { "epoch": 6.26, "learning_rate": 1.8695955978688097e-05, "loss": 0.8735, "step": 71680 }, { "epoch": 6.26, "learning_rate": 1.869158878504673e-05, "loss": 0.8881, "step": 71690 }, { "epoch": 6.26, "learning_rate": 1.8687221591405363e-05, "loss": 0.9429, "step": 71700 }, { "epoch": 6.26, "learning_rate": 1.8682854397763997e-05, "loss": 0.94, "step": 71710 }, { "epoch": 6.26, "learning_rate": 1.8678487204122633e-05, "loss": 0.8265, "step": 71720 }, { "epoch": 6.27, "learning_rate": 1.8674120010481266e-05, "loss": 0.809, "step": 71730 }, { "epoch": 6.27, "learning_rate": 1.86697528168399e-05, "loss": 0.753, "step": 71740 }, { "epoch": 6.27, "learning_rate": 1.8665385623198533e-05, "loss": 0.8257, "step": 71750 }, { "epoch": 6.27, "learning_rate": 1.8661018429557166e-05, "loss": 0.8624, "step": 71760 }, { "epoch": 6.27, "learning_rate": 1.86566512359158e-05, "loss": 0.8275, "step": 71770 }, { "epoch": 6.27, "learning_rate": 1.8652284042274436e-05, "loss": 0.7989, "step": 71780 }, { "epoch": 6.27, "learning_rate": 1.864791684863307e-05, "loss": 0.9155, "step": 71790 }, { "epoch": 6.27, "learning_rate": 1.8643549654991706e-05, "loss": 0.8904, "step": 71800 }, { "epoch": 6.27, "learning_rate": 1.863918246135034e-05, "loss": 0.7974, "step": 71810 }, { "epoch": 6.27, "learning_rate": 1.8634815267708972e-05, "loss": 0.8752, "step": 71820 }, { "epoch": 6.27, "learning_rate": 1.8630448074067605e-05, "loss": 0.8331, "step": 71830 }, { "epoch": 6.27, "learning_rate": 1.862608088042624e-05, "loss": 0.9015, "step": 71840 }, { "epoch": 6.28, "learning_rate": 1.862171368678487e-05, "loss": 0.8611, "step": 71850 }, { "epoch": 6.28, "learning_rate": 1.8617346493143505e-05, "loss": 0.8904, "step": 71860 }, { "epoch": 6.28, "learning_rate": 1.8612979299502138e-05, "loss": 1.0525, "step": 71870 }, { "epoch": 6.28, "learning_rate": 1.8608612105860775e-05, "loss": 0.9196, "step": 71880 }, { "epoch": 6.28, "learning_rate": 1.860424491221941e-05, "loss": 0.9356, "step": 71890 }, { "epoch": 6.28, "learning_rate": 1.8599877718578044e-05, "loss": 0.9729, "step": 71900 }, { "epoch": 6.28, "learning_rate": 1.8595510524936678e-05, "loss": 0.9252, "step": 71910 }, { "epoch": 6.28, "learning_rate": 1.859114333129531e-05, "loss": 0.779, "step": 71920 }, { "epoch": 6.28, "learning_rate": 1.8586776137653944e-05, "loss": 0.8593, "step": 71930 }, { "epoch": 6.28, "learning_rate": 1.8582408944012577e-05, "loss": 0.9652, "step": 71940 }, { "epoch": 6.28, "learning_rate": 1.857804175037121e-05, "loss": 0.9166, "step": 71950 }, { "epoch": 6.29, "learning_rate": 1.8573674556729847e-05, "loss": 0.862, "step": 71960 }, { "epoch": 6.29, "learning_rate": 1.856930736308848e-05, "loss": 0.9203, "step": 71970 }, { "epoch": 6.29, "learning_rate": 1.8564940169447113e-05, "loss": 0.7862, "step": 71980 }, { "epoch": 6.29, "learning_rate": 1.856057297580575e-05, "loss": 0.8305, "step": 71990 }, { "epoch": 6.29, "learning_rate": 1.8556205782164383e-05, "loss": 0.9233, "step": 72000 }, { "epoch": 6.29, "learning_rate": 1.8551838588523016e-05, "loss": 0.8982, "step": 72010 }, { "epoch": 6.29, "learning_rate": 1.854747139488165e-05, "loss": 0.8632, "step": 72020 }, { "epoch": 6.29, "learning_rate": 1.8543104201240283e-05, "loss": 0.9604, "step": 72030 }, { "epoch": 6.29, "learning_rate": 1.853873700759892e-05, "loss": 1.0333, "step": 72040 }, { "epoch": 6.29, "learning_rate": 1.8534369813957553e-05, "loss": 0.9812, "step": 72050 }, { "epoch": 6.29, "learning_rate": 1.8530002620316186e-05, "loss": 0.8037, "step": 72060 }, { "epoch": 6.29, "learning_rate": 1.852563542667482e-05, "loss": 0.8472, "step": 72070 }, { "epoch": 6.3, "learning_rate": 1.8521268233033452e-05, "loss": 0.8167, "step": 72080 }, { "epoch": 6.3, "learning_rate": 1.851690103939209e-05, "loss": 0.8576, "step": 72090 }, { "epoch": 6.3, "learning_rate": 1.8512533845750722e-05, "loss": 0.764, "step": 72100 }, { "epoch": 6.3, "learning_rate": 1.8508166652109355e-05, "loss": 0.9031, "step": 72110 }, { "epoch": 6.3, "learning_rate": 1.850379945846799e-05, "loss": 0.8785, "step": 72120 }, { "epoch": 6.3, "learning_rate": 1.8499432264826625e-05, "loss": 0.9295, "step": 72130 }, { "epoch": 6.3, "learning_rate": 1.8495065071185258e-05, "loss": 0.7861, "step": 72140 }, { "epoch": 6.3, "learning_rate": 1.849069787754389e-05, "loss": 0.8132, "step": 72150 }, { "epoch": 6.3, "learning_rate": 1.8486330683902525e-05, "loss": 1.0356, "step": 72160 }, { "epoch": 6.3, "learning_rate": 1.8481963490261158e-05, "loss": 0.9712, "step": 72170 }, { "epoch": 6.3, "learning_rate": 1.847759629661979e-05, "loss": 0.729, "step": 72180 }, { "epoch": 6.31, "learning_rate": 1.8473229102978428e-05, "loss": 0.9187, "step": 72190 }, { "epoch": 6.31, "learning_rate": 1.846886190933706e-05, "loss": 0.8559, "step": 72200 }, { "epoch": 6.31, "learning_rate": 1.8464494715695697e-05, "loss": 0.8775, "step": 72210 }, { "epoch": 6.31, "learning_rate": 1.846012752205433e-05, "loss": 0.7853, "step": 72220 }, { "epoch": 6.31, "learning_rate": 1.8455760328412964e-05, "loss": 0.968, "step": 72230 }, { "epoch": 6.31, "learning_rate": 1.8451393134771597e-05, "loss": 0.9758, "step": 72240 }, { "epoch": 6.31, "learning_rate": 1.844702594113023e-05, "loss": 0.8382, "step": 72250 }, { "epoch": 6.31, "learning_rate": 1.8442658747488863e-05, "loss": 0.908, "step": 72260 }, { "epoch": 6.31, "learning_rate": 1.8438291553847497e-05, "loss": 0.929, "step": 72270 }, { "epoch": 6.31, "learning_rate": 1.843392436020613e-05, "loss": 0.9459, "step": 72280 }, { "epoch": 6.31, "learning_rate": 1.8429557166564766e-05, "loss": 0.8529, "step": 72290 }, { "epoch": 6.31, "learning_rate": 1.84251899729234e-05, "loss": 0.8481, "step": 72300 }, { "epoch": 6.32, "learning_rate": 1.8420822779282036e-05, "loss": 0.7976, "step": 72310 }, { "epoch": 6.32, "learning_rate": 1.841645558564067e-05, "loss": 0.9474, "step": 72320 }, { "epoch": 6.32, "learning_rate": 1.8412088391999303e-05, "loss": 0.899, "step": 72330 }, { "epoch": 6.32, "learning_rate": 1.8407721198357936e-05, "loss": 0.8752, "step": 72340 }, { "epoch": 6.32, "learning_rate": 1.840335400471657e-05, "loss": 0.9167, "step": 72350 }, { "epoch": 6.32, "learning_rate": 1.8398986811075202e-05, "loss": 0.8355, "step": 72360 }, { "epoch": 6.32, "learning_rate": 1.839461961743384e-05, "loss": 1.0397, "step": 72370 }, { "epoch": 6.32, "learning_rate": 1.8390252423792472e-05, "loss": 0.9264, "step": 72380 }, { "epoch": 6.32, "learning_rate": 1.8385885230151105e-05, "loss": 0.823, "step": 72390 }, { "epoch": 6.32, "learning_rate": 1.838151803650974e-05, "loss": 0.8842, "step": 72400 }, { "epoch": 6.32, "learning_rate": 1.8377150842868375e-05, "loss": 0.9359, "step": 72410 }, { "epoch": 6.33, "learning_rate": 1.8372783649227008e-05, "loss": 0.977, "step": 72420 }, { "epoch": 6.33, "learning_rate": 1.836841645558564e-05, "loss": 0.8938, "step": 72430 }, { "epoch": 6.33, "learning_rate": 1.8364049261944275e-05, "loss": 0.8165, "step": 72440 }, { "epoch": 6.33, "learning_rate": 1.8359682068302908e-05, "loss": 0.8557, "step": 72450 }, { "epoch": 6.33, "learning_rate": 1.8355314874661544e-05, "loss": 0.8756, "step": 72460 }, { "epoch": 6.33, "learning_rate": 1.8350947681020178e-05, "loss": 0.9095, "step": 72470 }, { "epoch": 6.33, "learning_rate": 1.834658048737881e-05, "loss": 0.8448, "step": 72480 }, { "epoch": 6.33, "learning_rate": 1.8342213293737444e-05, "loss": 0.9614, "step": 72490 }, { "epoch": 6.33, "learning_rate": 1.8337846100096077e-05, "loss": 0.9148, "step": 72500 }, { "epoch": 6.33, "learning_rate": 1.8333478906454714e-05, "loss": 0.9234, "step": 72510 }, { "epoch": 6.33, "learning_rate": 1.8329111712813347e-05, "loss": 0.8872, "step": 72520 }, { "epoch": 6.34, "learning_rate": 1.832474451917198e-05, "loss": 0.8646, "step": 72530 }, { "epoch": 6.34, "learning_rate": 1.8320377325530617e-05, "loss": 0.8369, "step": 72540 }, { "epoch": 6.34, "learning_rate": 1.831601013188925e-05, "loss": 0.8291, "step": 72550 }, { "epoch": 6.34, "learning_rate": 1.8311642938247883e-05, "loss": 0.788, "step": 72560 }, { "epoch": 6.34, "learning_rate": 1.8307275744606516e-05, "loss": 0.8511, "step": 72570 }, { "epoch": 6.34, "learning_rate": 1.830290855096515e-05, "loss": 0.7602, "step": 72580 }, { "epoch": 6.34, "learning_rate": 1.8298541357323783e-05, "loss": 0.8554, "step": 72590 }, { "epoch": 6.34, "learning_rate": 1.8294174163682416e-05, "loss": 0.8953, "step": 72600 }, { "epoch": 6.34, "learning_rate": 1.8289806970041053e-05, "loss": 0.9803, "step": 72610 }, { "epoch": 6.34, "learning_rate": 1.828543977639969e-05, "loss": 0.9335, "step": 72620 }, { "epoch": 6.34, "learning_rate": 1.8281072582758322e-05, "loss": 0.8681, "step": 72630 }, { "epoch": 6.34, "learning_rate": 1.8276705389116956e-05, "loss": 0.9368, "step": 72640 }, { "epoch": 6.35, "learning_rate": 1.827233819547559e-05, "loss": 0.7977, "step": 72650 }, { "epoch": 6.35, "learning_rate": 1.8267971001834222e-05, "loss": 0.9461, "step": 72660 }, { "epoch": 6.35, "learning_rate": 1.8263603808192855e-05, "loss": 0.814, "step": 72670 }, { "epoch": 6.35, "learning_rate": 1.825923661455149e-05, "loss": 0.8761, "step": 72680 }, { "epoch": 6.35, "learning_rate": 1.825486942091012e-05, "loss": 0.8702, "step": 72690 }, { "epoch": 6.35, "learning_rate": 1.8250502227268758e-05, "loss": 0.945, "step": 72700 }, { "epoch": 6.35, "learning_rate": 1.824613503362739e-05, "loss": 0.8462, "step": 72710 }, { "epoch": 6.35, "learning_rate": 1.8241767839986028e-05, "loss": 0.9154, "step": 72720 }, { "epoch": 6.35, "learning_rate": 1.823740064634466e-05, "loss": 0.7897, "step": 72730 }, { "epoch": 6.35, "learning_rate": 1.8233033452703294e-05, "loss": 0.8775, "step": 72740 }, { "epoch": 6.35, "learning_rate": 1.8228666259061928e-05, "loss": 0.9439, "step": 72750 }, { "epoch": 6.36, "learning_rate": 1.822429906542056e-05, "loss": 0.9207, "step": 72760 }, { "epoch": 6.36, "learning_rate": 1.8219931871779194e-05, "loss": 0.8939, "step": 72770 }, { "epoch": 6.36, "learning_rate": 1.821556467813783e-05, "loss": 0.8118, "step": 72780 }, { "epoch": 6.36, "learning_rate": 1.8211197484496464e-05, "loss": 0.8528, "step": 72790 }, { "epoch": 6.36, "learning_rate": 1.8206830290855097e-05, "loss": 1.0453, "step": 72800 }, { "epoch": 6.36, "learning_rate": 1.820246309721373e-05, "loss": 0.877, "step": 72810 }, { "epoch": 6.36, "learning_rate": 1.8198095903572367e-05, "loss": 0.881, "step": 72820 }, { "epoch": 6.36, "learning_rate": 1.8193728709931e-05, "loss": 0.8764, "step": 72830 }, { "epoch": 6.36, "learning_rate": 1.8189361516289633e-05, "loss": 0.8874, "step": 72840 }, { "epoch": 6.36, "learning_rate": 1.8184994322648266e-05, "loss": 0.8844, "step": 72850 }, { "epoch": 6.36, "learning_rate": 1.81806271290069e-05, "loss": 0.8962, "step": 72860 }, { "epoch": 6.36, "learning_rate": 1.8176259935365536e-05, "loss": 0.8048, "step": 72870 }, { "epoch": 6.37, "learning_rate": 1.817189274172417e-05, "loss": 0.7615, "step": 72880 }, { "epoch": 6.37, "learning_rate": 1.8167525548082803e-05, "loss": 0.8452, "step": 72890 }, { "epoch": 6.37, "learning_rate": 1.8163158354441436e-05, "loss": 0.7971, "step": 72900 }, { "epoch": 6.37, "learning_rate": 1.815879116080007e-05, "loss": 0.9708, "step": 72910 }, { "epoch": 6.37, "learning_rate": 1.8154423967158706e-05, "loss": 0.8657, "step": 72920 }, { "epoch": 6.37, "learning_rate": 1.815005677351734e-05, "loss": 0.8003, "step": 72930 }, { "epoch": 6.37, "learning_rate": 1.8145689579875972e-05, "loss": 0.8514, "step": 72940 }, { "epoch": 6.37, "learning_rate": 1.814132238623461e-05, "loss": 0.8204, "step": 72950 }, { "epoch": 6.37, "learning_rate": 1.8136955192593242e-05, "loss": 1.0244, "step": 72960 }, { "epoch": 6.37, "learning_rate": 1.8132587998951875e-05, "loss": 0.9016, "step": 72970 }, { "epoch": 6.37, "learning_rate": 1.8128220805310508e-05, "loss": 0.8323, "step": 72980 }, { "epoch": 6.38, "learning_rate": 1.812385361166914e-05, "loss": 0.783, "step": 72990 }, { "epoch": 6.38, "learning_rate": 1.8119486418027775e-05, "loss": 0.7646, "step": 73000 }, { "epoch": 6.38, "learning_rate": 1.8115119224386408e-05, "loss": 0.7498, "step": 73010 }, { "epoch": 6.38, "learning_rate": 1.8110752030745044e-05, "loss": 0.8537, "step": 73020 }, { "epoch": 6.38, "learning_rate": 1.8106384837103678e-05, "loss": 1.1, "step": 73030 }, { "epoch": 6.38, "learning_rate": 1.8102017643462314e-05, "loss": 0.8838, "step": 73040 }, { "epoch": 6.38, "learning_rate": 1.8097650449820947e-05, "loss": 0.843, "step": 73050 }, { "epoch": 6.38, "learning_rate": 1.809328325617958e-05, "loss": 0.8624, "step": 73060 }, { "epoch": 6.38, "learning_rate": 1.8088916062538214e-05, "loss": 0.9567, "step": 73070 }, { "epoch": 6.38, "learning_rate": 1.8084548868896847e-05, "loss": 0.8534, "step": 73080 }, { "epoch": 6.38, "learning_rate": 1.808018167525548e-05, "loss": 0.8378, "step": 73090 }, { "epoch": 6.38, "learning_rate": 1.8075814481614113e-05, "loss": 0.8514, "step": 73100 }, { "epoch": 6.39, "learning_rate": 1.807144728797275e-05, "loss": 0.8316, "step": 73110 }, { "epoch": 6.39, "learning_rate": 1.8067080094331383e-05, "loss": 0.9003, "step": 73120 }, { "epoch": 6.39, "learning_rate": 1.8062712900690016e-05, "loss": 0.8278, "step": 73130 }, { "epoch": 6.39, "learning_rate": 1.8058345707048653e-05, "loss": 0.8687, "step": 73140 }, { "epoch": 6.39, "learning_rate": 1.8053978513407286e-05, "loss": 0.7913, "step": 73150 }, { "epoch": 6.39, "learning_rate": 1.804961131976592e-05, "loss": 0.89, "step": 73160 }, { "epoch": 6.39, "learning_rate": 1.8045244126124553e-05, "loss": 0.8576, "step": 73170 }, { "epoch": 6.39, "learning_rate": 1.8040876932483186e-05, "loss": 0.9113, "step": 73180 }, { "epoch": 6.39, "learning_rate": 1.8036509738841822e-05, "loss": 0.9816, "step": 73190 }, { "epoch": 6.39, "learning_rate": 1.8032142545200456e-05, "loss": 0.9111, "step": 73200 }, { "epoch": 6.39, "learning_rate": 1.802777535155909e-05, "loss": 0.8329, "step": 73210 }, { "epoch": 6.4, "learning_rate": 1.8023408157917722e-05, "loss": 0.8918, "step": 73220 }, { "epoch": 6.4, "learning_rate": 1.8019040964276355e-05, "loss": 0.8055, "step": 73230 }, { "epoch": 6.4, "learning_rate": 1.8014673770634992e-05, "loss": 0.7648, "step": 73240 }, { "epoch": 6.4, "learning_rate": 1.8010306576993625e-05, "loss": 1.0078, "step": 73250 }, { "epoch": 6.4, "learning_rate": 1.8005939383352258e-05, "loss": 0.7405, "step": 73260 }, { "epoch": 6.4, "learning_rate": 1.800157218971089e-05, "loss": 0.8657, "step": 73270 }, { "epoch": 6.4, "learning_rate": 1.7997204996069528e-05, "loss": 0.896, "step": 73280 }, { "epoch": 6.4, "learning_rate": 1.799283780242816e-05, "loss": 0.9755, "step": 73290 }, { "epoch": 6.4, "learning_rate": 1.7988470608786794e-05, "loss": 0.7952, "step": 73300 }, { "epoch": 6.4, "learning_rate": 1.7984103415145428e-05, "loss": 0.8558, "step": 73310 }, { "epoch": 6.4, "learning_rate": 1.797973622150406e-05, "loss": 0.9977, "step": 73320 }, { "epoch": 6.4, "learning_rate": 1.7975369027862697e-05, "loss": 1.0686, "step": 73330 }, { "epoch": 6.41, "learning_rate": 1.797100183422133e-05, "loss": 0.9011, "step": 73340 }, { "epoch": 6.41, "learning_rate": 1.7966634640579964e-05, "loss": 0.7721, "step": 73350 }, { "epoch": 6.41, "learning_rate": 1.79622674469386e-05, "loss": 0.7239, "step": 73360 }, { "epoch": 6.41, "learning_rate": 1.7957900253297234e-05, "loss": 0.8572, "step": 73370 }, { "epoch": 6.41, "learning_rate": 1.7953533059655867e-05, "loss": 1.0076, "step": 73380 }, { "epoch": 6.41, "learning_rate": 1.79491658660145e-05, "loss": 0.9593, "step": 73390 }, { "epoch": 6.41, "learning_rate": 1.7944798672373133e-05, "loss": 0.8696, "step": 73400 }, { "epoch": 6.41, "learning_rate": 1.7940431478731766e-05, "loss": 0.9169, "step": 73410 }, { "epoch": 6.41, "learning_rate": 1.79360642850904e-05, "loss": 0.8894, "step": 73420 }, { "epoch": 6.41, "learning_rate": 1.7931697091449036e-05, "loss": 0.9452, "step": 73430 }, { "epoch": 6.41, "learning_rate": 1.792732989780767e-05, "loss": 0.9116, "step": 73440 }, { "epoch": 6.42, "learning_rate": 1.7922962704166306e-05, "loss": 0.9398, "step": 73450 }, { "epoch": 6.42, "learning_rate": 1.791859551052494e-05, "loss": 0.8308, "step": 73460 }, { "epoch": 6.42, "learning_rate": 1.7914228316883572e-05, "loss": 0.828, "step": 73470 }, { "epoch": 6.42, "learning_rate": 1.7909861123242206e-05, "loss": 0.854, "step": 73480 }, { "epoch": 6.42, "learning_rate": 1.790549392960084e-05, "loss": 0.91, "step": 73490 }, { "epoch": 6.42, "learning_rate": 1.7901126735959472e-05, "loss": 0.8678, "step": 73500 }, { "epoch": 6.42, "learning_rate": 1.7896759542318105e-05, "loss": 0.8256, "step": 73510 }, { "epoch": 6.42, "learning_rate": 1.7892392348676742e-05, "loss": 0.7885, "step": 73520 }, { "epoch": 6.42, "learning_rate": 1.7888025155035375e-05, "loss": 0.9555, "step": 73530 }, { "epoch": 6.42, "learning_rate": 1.7883657961394008e-05, "loss": 0.9513, "step": 73540 }, { "epoch": 6.42, "learning_rate": 1.7879290767752645e-05, "loss": 0.9029, "step": 73550 }, { "epoch": 6.43, "learning_rate": 1.7874923574111278e-05, "loss": 0.8811, "step": 73560 }, { "epoch": 6.43, "learning_rate": 1.787055638046991e-05, "loss": 0.9734, "step": 73570 }, { "epoch": 6.43, "learning_rate": 1.7866189186828544e-05, "loss": 0.8919, "step": 73580 }, { "epoch": 6.43, "learning_rate": 1.7861821993187178e-05, "loss": 1.0024, "step": 73590 }, { "epoch": 6.43, "learning_rate": 1.785745479954581e-05, "loss": 0.8615, "step": 73600 }, { "epoch": 6.43, "learning_rate": 1.7853087605904447e-05, "loss": 0.8416, "step": 73610 }, { "epoch": 6.43, "learning_rate": 1.784872041226308e-05, "loss": 0.9342, "step": 73620 }, { "epoch": 6.43, "learning_rate": 1.7844353218621714e-05, "loss": 0.9316, "step": 73630 }, { "epoch": 6.43, "learning_rate": 1.7839986024980347e-05, "loss": 0.7744, "step": 73640 }, { "epoch": 6.43, "learning_rate": 1.7835618831338984e-05, "loss": 0.9396, "step": 73650 }, { "epoch": 6.43, "learning_rate": 1.7831251637697617e-05, "loss": 0.9626, "step": 73660 }, { "epoch": 6.43, "learning_rate": 1.782688444405625e-05, "loss": 0.8825, "step": 73670 }, { "epoch": 6.44, "learning_rate": 1.7822517250414883e-05, "loss": 0.8868, "step": 73680 }, { "epoch": 6.44, "learning_rate": 1.781815005677352e-05, "loss": 0.8375, "step": 73690 }, { "epoch": 6.44, "learning_rate": 1.7813782863132153e-05, "loss": 0.8983, "step": 73700 }, { "epoch": 6.44, "learning_rate": 1.7809415669490786e-05, "loss": 0.8813, "step": 73710 }, { "epoch": 6.44, "learning_rate": 1.780504847584942e-05, "loss": 0.8971, "step": 73720 }, { "epoch": 6.44, "learning_rate": 1.7800681282208053e-05, "loss": 0.9454, "step": 73730 }, { "epoch": 6.44, "learning_rate": 1.7796314088566686e-05, "loss": 0.7878, "step": 73740 }, { "epoch": 6.44, "learning_rate": 1.7791946894925322e-05, "loss": 0.9798, "step": 73750 }, { "epoch": 6.44, "learning_rate": 1.7787579701283956e-05, "loss": 0.9063, "step": 73760 }, { "epoch": 6.44, "learning_rate": 1.7783212507642592e-05, "loss": 0.8441, "step": 73770 }, { "epoch": 6.44, "learning_rate": 1.7778845314001225e-05, "loss": 0.7692, "step": 73780 }, { "epoch": 6.45, "learning_rate": 1.777447812035986e-05, "loss": 0.8489, "step": 73790 }, { "epoch": 6.45, "learning_rate": 1.7770110926718492e-05, "loss": 0.8931, "step": 73800 }, { "epoch": 6.45, "learning_rate": 1.7765743733077125e-05, "loss": 0.8762, "step": 73810 }, { "epoch": 6.45, "learning_rate": 1.7761376539435758e-05, "loss": 0.931, "step": 73820 }, { "epoch": 6.45, "learning_rate": 1.775700934579439e-05, "loss": 0.869, "step": 73830 }, { "epoch": 6.45, "learning_rate": 1.7752642152153025e-05, "loss": 0.9606, "step": 73840 }, { "epoch": 6.45, "learning_rate": 1.774827495851166e-05, "loss": 0.9048, "step": 73850 }, { "epoch": 6.45, "learning_rate": 1.7743907764870298e-05, "loss": 0.7423, "step": 73860 }, { "epoch": 6.45, "learning_rate": 1.773954057122893e-05, "loss": 0.8968, "step": 73870 }, { "epoch": 6.45, "learning_rate": 1.7735173377587564e-05, "loss": 0.8955, "step": 73880 }, { "epoch": 6.45, "learning_rate": 1.7730806183946197e-05, "loss": 0.8028, "step": 73890 }, { "epoch": 6.45, "learning_rate": 1.772643899030483e-05, "loss": 0.8119, "step": 73900 }, { "epoch": 6.46, "learning_rate": 1.7722071796663464e-05, "loss": 0.8178, "step": 73910 }, { "epoch": 6.46, "learning_rate": 1.7717704603022097e-05, "loss": 0.8975, "step": 73920 }, { "epoch": 6.46, "learning_rate": 1.7713337409380734e-05, "loss": 0.7846, "step": 73930 }, { "epoch": 6.46, "learning_rate": 1.7708970215739367e-05, "loss": 0.8905, "step": 73940 }, { "epoch": 6.46, "learning_rate": 1.7704603022098e-05, "loss": 0.9009, "step": 73950 }, { "epoch": 6.46, "learning_rate": 1.7700235828456637e-05, "loss": 1.1102, "step": 73960 }, { "epoch": 6.46, "learning_rate": 1.769586863481527e-05, "loss": 0.8788, "step": 73970 }, { "epoch": 6.46, "learning_rate": 1.7691501441173903e-05, "loss": 0.8292, "step": 73980 }, { "epoch": 6.46, "learning_rate": 1.7687134247532536e-05, "loss": 0.9189, "step": 73990 }, { "epoch": 6.46, "learning_rate": 1.768276705389117e-05, "loss": 0.8096, "step": 74000 }, { "epoch": 6.46, "learning_rate": 1.7678399860249803e-05, "loss": 0.771, "step": 74010 }, { "epoch": 6.47, "learning_rate": 1.767403266660844e-05, "loss": 0.6984, "step": 74020 }, { "epoch": 6.47, "learning_rate": 1.7669665472967072e-05, "loss": 0.9066, "step": 74030 }, { "epoch": 6.47, "learning_rate": 1.7665298279325706e-05, "loss": 0.9527, "step": 74040 }, { "epoch": 6.47, "learning_rate": 1.766093108568434e-05, "loss": 0.884, "step": 74050 }, { "epoch": 6.47, "learning_rate": 1.7656563892042975e-05, "loss": 0.8839, "step": 74060 }, { "epoch": 6.47, "learning_rate": 1.765219669840161e-05, "loss": 0.8211, "step": 74070 }, { "epoch": 6.47, "learning_rate": 1.7647829504760242e-05, "loss": 0.8189, "step": 74080 }, { "epoch": 6.47, "learning_rate": 1.7643462311118875e-05, "loss": 0.7521, "step": 74090 }, { "epoch": 6.47, "learning_rate": 1.763909511747751e-05, "loss": 0.8887, "step": 74100 }, { "epoch": 6.47, "learning_rate": 1.7634727923836145e-05, "loss": 0.9535, "step": 74110 }, { "epoch": 6.47, "learning_rate": 1.7630360730194778e-05, "loss": 1.0001, "step": 74120 }, { "epoch": 6.47, "learning_rate": 1.762599353655341e-05, "loss": 0.9906, "step": 74130 }, { "epoch": 6.48, "learning_rate": 1.7621626342912044e-05, "loss": 0.9026, "step": 74140 }, { "epoch": 6.48, "learning_rate": 1.7617259149270678e-05, "loss": 0.731, "step": 74150 }, { "epoch": 6.48, "learning_rate": 1.7612891955629314e-05, "loss": 0.7775, "step": 74160 }, { "epoch": 6.48, "learning_rate": 1.7608524761987947e-05, "loss": 0.9024, "step": 74170 }, { "epoch": 6.48, "learning_rate": 1.7604157568346584e-05, "loss": 0.9135, "step": 74180 }, { "epoch": 6.48, "learning_rate": 1.7599790374705217e-05, "loss": 0.8827, "step": 74190 }, { "epoch": 6.48, "learning_rate": 1.759542318106385e-05, "loss": 0.7173, "step": 74200 }, { "epoch": 6.48, "learning_rate": 1.7591055987422484e-05, "loss": 0.9326, "step": 74210 }, { "epoch": 6.48, "learning_rate": 1.7586688793781117e-05, "loss": 0.8299, "step": 74220 }, { "epoch": 6.48, "learning_rate": 1.758232160013975e-05, "loss": 0.8681, "step": 74230 }, { "epoch": 6.48, "learning_rate": 1.7577954406498383e-05, "loss": 0.8935, "step": 74240 }, { "epoch": 6.49, "learning_rate": 1.7573587212857016e-05, "loss": 0.829, "step": 74250 }, { "epoch": 6.49, "learning_rate": 1.7569220019215653e-05, "loss": 0.8176, "step": 74260 }, { "epoch": 6.49, "learning_rate": 1.7564852825574286e-05, "loss": 0.8994, "step": 74270 }, { "epoch": 6.49, "learning_rate": 1.7560485631932923e-05, "loss": 0.9441, "step": 74280 }, { "epoch": 6.49, "learning_rate": 1.7556118438291556e-05, "loss": 0.9572, "step": 74290 }, { "epoch": 6.49, "learning_rate": 1.755175124465019e-05, "loss": 0.83, "step": 74300 }, { "epoch": 6.49, "learning_rate": 1.7547384051008822e-05, "loss": 0.9199, "step": 74310 }, { "epoch": 6.49, "learning_rate": 1.7543016857367456e-05, "loss": 0.8054, "step": 74320 }, { "epoch": 6.49, "learning_rate": 1.753864966372609e-05, "loss": 0.8224, "step": 74330 }, { "epoch": 6.49, "learning_rate": 1.7534282470084722e-05, "loss": 0.8242, "step": 74340 }, { "epoch": 6.49, "learning_rate": 1.752991527644336e-05, "loss": 0.8085, "step": 74350 }, { "epoch": 6.49, "learning_rate": 1.7525548082801992e-05, "loss": 0.8783, "step": 74360 }, { "epoch": 6.5, "learning_rate": 1.7521180889160625e-05, "loss": 0.9144, "step": 74370 }, { "epoch": 6.5, "learning_rate": 1.751681369551926e-05, "loss": 0.9739, "step": 74380 }, { "epoch": 6.5, "learning_rate": 1.7512446501877895e-05, "loss": 0.9597, "step": 74390 }, { "epoch": 6.5, "learning_rate": 1.7508079308236528e-05, "loss": 0.8224, "step": 74400 }, { "epoch": 6.5, "learning_rate": 1.750371211459516e-05, "loss": 0.9315, "step": 74410 }, { "epoch": 6.5, "learning_rate": 1.7499344920953794e-05, "loss": 0.9588, "step": 74420 }, { "epoch": 6.5, "learning_rate": 1.749497772731243e-05, "loss": 0.9594, "step": 74430 }, { "epoch": 6.5, "learning_rate": 1.7490610533671064e-05, "loss": 0.9141, "step": 74440 }, { "epoch": 6.5, "learning_rate": 1.7486243340029697e-05, "loss": 0.859, "step": 74450 }, { "epoch": 6.5, "learning_rate": 1.748187614638833e-05, "loss": 0.8619, "step": 74460 }, { "epoch": 6.5, "learning_rate": 1.7477508952746964e-05, "loss": 0.8729, "step": 74470 }, { "epoch": 6.51, "learning_rate": 1.74731417591056e-05, "loss": 0.8278, "step": 74480 }, { "epoch": 6.51, "learning_rate": 1.7468774565464234e-05, "loss": 0.831, "step": 74490 }, { "epoch": 6.51, "learning_rate": 1.7464407371822867e-05, "loss": 0.7858, "step": 74500 }, { "epoch": 6.51, "learning_rate": 1.7460040178181503e-05, "loss": 0.9425, "step": 74510 }, { "epoch": 6.51, "learning_rate": 1.7455672984540137e-05, "loss": 0.945, "step": 74520 }, { "epoch": 6.51, "learning_rate": 1.745130579089877e-05, "loss": 0.8858, "step": 74530 }, { "epoch": 6.51, "learning_rate": 1.7446938597257403e-05, "loss": 0.8734, "step": 74540 }, { "epoch": 6.51, "learning_rate": 1.7442571403616036e-05, "loss": 0.7761, "step": 74550 }, { "epoch": 6.51, "learning_rate": 1.743820420997467e-05, "loss": 0.9188, "step": 74560 }, { "epoch": 6.51, "learning_rate": 1.7433837016333303e-05, "loss": 0.8193, "step": 74570 }, { "epoch": 6.51, "learning_rate": 1.742946982269194e-05, "loss": 0.8088, "step": 74580 }, { "epoch": 6.51, "learning_rate": 1.7425102629050576e-05, "loss": 0.9215, "step": 74590 }, { "epoch": 6.52, "learning_rate": 1.742073543540921e-05, "loss": 0.8416, "step": 74600 }, { "epoch": 6.52, "learning_rate": 1.7416368241767842e-05, "loss": 0.7439, "step": 74610 }, { "epoch": 6.52, "learning_rate": 1.7412001048126475e-05, "loss": 0.761, "step": 74620 }, { "epoch": 6.52, "learning_rate": 1.740763385448511e-05, "loss": 0.8256, "step": 74630 }, { "epoch": 6.52, "learning_rate": 1.7403266660843742e-05, "loss": 0.9349, "step": 74640 }, { "epoch": 6.52, "learning_rate": 1.7398899467202375e-05, "loss": 0.8867, "step": 74650 }, { "epoch": 6.52, "learning_rate": 1.7394532273561008e-05, "loss": 0.9763, "step": 74660 }, { "epoch": 6.52, "learning_rate": 1.7390165079919645e-05, "loss": 0.9446, "step": 74670 }, { "epoch": 6.52, "learning_rate": 1.7385797886278278e-05, "loss": 0.9561, "step": 74680 }, { "epoch": 6.52, "learning_rate": 1.7381430692636915e-05, "loss": 0.936, "step": 74690 }, { "epoch": 6.52, "learning_rate": 1.7377063498995548e-05, "loss": 0.9268, "step": 74700 }, { "epoch": 6.53, "learning_rate": 1.737269630535418e-05, "loss": 0.7831, "step": 74710 }, { "epoch": 6.53, "learning_rate": 1.7368329111712814e-05, "loss": 0.8411, "step": 74720 }, { "epoch": 6.53, "learning_rate": 1.7363961918071447e-05, "loss": 0.8239, "step": 74730 }, { "epoch": 6.53, "learning_rate": 1.735959472443008e-05, "loss": 0.8891, "step": 74740 }, { "epoch": 6.53, "learning_rate": 1.7355227530788714e-05, "loss": 0.9446, "step": 74750 }, { "epoch": 6.53, "learning_rate": 1.735086033714735e-05, "loss": 0.8556, "step": 74760 }, { "epoch": 6.53, "learning_rate": 1.7346493143505984e-05, "loss": 1.0043, "step": 74770 }, { "epoch": 6.53, "learning_rate": 1.7342125949864617e-05, "loss": 0.8463, "step": 74780 }, { "epoch": 6.53, "learning_rate": 1.7337758756223253e-05, "loss": 0.7898, "step": 74790 }, { "epoch": 6.53, "learning_rate": 1.7333391562581887e-05, "loss": 0.7832, "step": 74800 }, { "epoch": 6.53, "learning_rate": 1.732902436894052e-05, "loss": 0.7703, "step": 74810 }, { "epoch": 6.54, "learning_rate": 1.7324657175299153e-05, "loss": 0.8626, "step": 74820 }, { "epoch": 6.54, "learning_rate": 1.7320289981657786e-05, "loss": 0.779, "step": 74830 }, { "epoch": 6.54, "learning_rate": 1.7315922788016423e-05, "loss": 0.8687, "step": 74840 }, { "epoch": 6.54, "learning_rate": 1.7311555594375056e-05, "loss": 0.9799, "step": 74850 }, { "epoch": 6.54, "learning_rate": 1.730718840073369e-05, "loss": 0.9162, "step": 74860 }, { "epoch": 6.54, "learning_rate": 1.7302821207092322e-05, "loss": 0.9355, "step": 74870 }, { "epoch": 6.54, "learning_rate": 1.7298454013450956e-05, "loss": 0.9667, "step": 74880 }, { "epoch": 6.54, "learning_rate": 1.7294086819809592e-05, "loss": 0.8542, "step": 74890 }, { "epoch": 6.54, "learning_rate": 1.7289719626168225e-05, "loss": 0.9315, "step": 74900 }, { "epoch": 6.54, "learning_rate": 1.728535243252686e-05, "loss": 0.9727, "step": 74910 }, { "epoch": 6.54, "learning_rate": 1.7280985238885495e-05, "loss": 0.8092, "step": 74920 }, { "epoch": 6.54, "learning_rate": 1.727661804524413e-05, "loss": 0.7561, "step": 74930 }, { "epoch": 6.55, "learning_rate": 1.727225085160276e-05, "loss": 0.8143, "step": 74940 }, { "epoch": 6.55, "learning_rate": 1.7267883657961395e-05, "loss": 0.8074, "step": 74950 }, { "epoch": 6.55, "learning_rate": 1.7263516464320028e-05, "loss": 0.7143, "step": 74960 }, { "epoch": 6.55, "learning_rate": 1.725914927067866e-05, "loss": 0.9074, "step": 74970 }, { "epoch": 6.55, "learning_rate": 1.7254782077037294e-05, "loss": 0.8451, "step": 74980 }, { "epoch": 6.55, "learning_rate": 1.725041488339593e-05, "loss": 0.8657, "step": 74990 }, { "epoch": 6.55, "learning_rate": 1.7246047689754564e-05, "loss": 1.0064, "step": 75000 }, { "epoch": 6.55, "eval_accuracy": 0.5770855851429695, "eval_loss": 0.8845101594924927, "eval_runtime": 84.0576, "eval_samples_per_second": 121.072, "eval_steps_per_second": 15.144, "step": 75000 }, { "epoch": 6.55, "learning_rate": 1.72416804961132e-05, "loss": 0.9781, "step": 75010 }, { "epoch": 6.55, "learning_rate": 1.7237313302471834e-05, "loss": 0.8648, "step": 75020 }, { "epoch": 6.55, "learning_rate": 1.7232946108830467e-05, "loss": 0.8318, "step": 75030 }, { "epoch": 6.55, "learning_rate": 1.72285789151891e-05, "loss": 0.9237, "step": 75040 }, { "epoch": 6.56, "learning_rate": 1.7224211721547734e-05, "loss": 0.7939, "step": 75050 }, { "epoch": 6.56, "learning_rate": 1.7219844527906367e-05, "loss": 0.727, "step": 75060 }, { "epoch": 6.56, "learning_rate": 1.7215477334265e-05, "loss": 0.6993, "step": 75070 }, { "epoch": 6.56, "learning_rate": 1.7211110140623633e-05, "loss": 0.9666, "step": 75080 }, { "epoch": 6.56, "learning_rate": 1.720674294698227e-05, "loss": 0.9837, "step": 75090 }, { "epoch": 6.56, "learning_rate": 1.7202375753340903e-05, "loss": 0.8808, "step": 75100 }, { "epoch": 6.56, "learning_rate": 1.719800855969954e-05, "loss": 0.7452, "step": 75110 }, { "epoch": 6.56, "learning_rate": 1.7193641366058173e-05, "loss": 0.8887, "step": 75120 }, { "epoch": 6.56, "learning_rate": 1.7189274172416806e-05, "loss": 0.9567, "step": 75130 }, { "epoch": 6.56, "learning_rate": 1.718490697877544e-05, "loss": 1.0352, "step": 75140 }, { "epoch": 6.56, "learning_rate": 1.7180539785134072e-05, "loss": 0.8081, "step": 75150 }, { "epoch": 6.56, "learning_rate": 1.7176172591492706e-05, "loss": 0.8319, "step": 75160 }, { "epoch": 6.57, "learning_rate": 1.7171805397851342e-05, "loss": 0.9222, "step": 75170 }, { "epoch": 6.57, "learning_rate": 1.7167438204209975e-05, "loss": 0.841, "step": 75180 }, { "epoch": 6.57, "learning_rate": 1.716307101056861e-05, "loss": 0.8228, "step": 75190 }, { "epoch": 6.57, "learning_rate": 1.7158703816927242e-05, "loss": 0.9036, "step": 75200 }, { "epoch": 6.57, "learning_rate": 1.715433662328588e-05, "loss": 0.884, "step": 75210 }, { "epoch": 6.57, "learning_rate": 1.714996942964451e-05, "loss": 0.8911, "step": 75220 }, { "epoch": 6.57, "learning_rate": 1.7145602236003145e-05, "loss": 0.923, "step": 75230 }, { "epoch": 6.57, "learning_rate": 1.7141235042361778e-05, "loss": 0.9036, "step": 75240 }, { "epoch": 6.57, "learning_rate": 1.7136867848720415e-05, "loss": 0.8899, "step": 75250 }, { "epoch": 6.57, "learning_rate": 1.7132500655079048e-05, "loss": 0.8429, "step": 75260 }, { "epoch": 6.57, "learning_rate": 1.712813346143768e-05, "loss": 0.8589, "step": 75270 }, { "epoch": 6.58, "learning_rate": 1.7123766267796314e-05, "loss": 0.8876, "step": 75280 }, { "epoch": 6.58, "learning_rate": 1.7119399074154947e-05, "loss": 0.8017, "step": 75290 }, { "epoch": 6.58, "learning_rate": 1.711503188051358e-05, "loss": 0.8653, "step": 75300 }, { "epoch": 6.58, "learning_rate": 1.7110664686872217e-05, "loss": 0.757, "step": 75310 }, { "epoch": 6.58, "learning_rate": 1.710629749323085e-05, "loss": 0.8293, "step": 75320 }, { "epoch": 6.58, "learning_rate": 1.7101930299589487e-05, "loss": 0.9265, "step": 75330 }, { "epoch": 6.58, "learning_rate": 1.709756310594812e-05, "loss": 0.8046, "step": 75340 }, { "epoch": 6.58, "learning_rate": 1.7093195912306753e-05, "loss": 0.8834, "step": 75350 }, { "epoch": 6.58, "learning_rate": 1.7088828718665387e-05, "loss": 0.9684, "step": 75360 }, { "epoch": 6.58, "learning_rate": 1.708446152502402e-05, "loss": 0.8649, "step": 75370 }, { "epoch": 6.58, "learning_rate": 1.7080094331382653e-05, "loss": 0.9592, "step": 75380 }, { "epoch": 6.58, "learning_rate": 1.7075727137741286e-05, "loss": 0.9522, "step": 75390 }, { "epoch": 6.59, "learning_rate": 1.707135994409992e-05, "loss": 0.8635, "step": 75400 }, { "epoch": 6.59, "learning_rate": 1.7066992750458556e-05, "loss": 0.8664, "step": 75410 }, { "epoch": 6.59, "learning_rate": 1.7062625556817193e-05, "loss": 0.9434, "step": 75420 }, { "epoch": 6.59, "learning_rate": 1.7058258363175826e-05, "loss": 0.7498, "step": 75430 }, { "epoch": 6.59, "learning_rate": 1.705389116953446e-05, "loss": 0.8146, "step": 75440 }, { "epoch": 6.59, "learning_rate": 1.7049523975893092e-05, "loss": 0.8528, "step": 75450 }, { "epoch": 6.59, "learning_rate": 1.7045156782251725e-05, "loss": 0.9379, "step": 75460 }, { "epoch": 6.59, "learning_rate": 1.704078958861036e-05, "loss": 0.8748, "step": 75470 }, { "epoch": 6.59, "learning_rate": 1.7036422394968992e-05, "loss": 0.8635, "step": 75480 }, { "epoch": 6.59, "learning_rate": 1.7032055201327625e-05, "loss": 0.9448, "step": 75490 }, { "epoch": 6.59, "learning_rate": 1.7027688007686262e-05, "loss": 0.9928, "step": 75500 }, { "epoch": 6.6, "learning_rate": 1.7023320814044895e-05, "loss": 0.969, "step": 75510 }, { "epoch": 6.6, "learning_rate": 1.701895362040353e-05, "loss": 0.9142, "step": 75520 }, { "epoch": 6.6, "learning_rate": 1.7014586426762165e-05, "loss": 0.8332, "step": 75530 }, { "epoch": 6.6, "learning_rate": 1.7010219233120798e-05, "loss": 0.9384, "step": 75540 }, { "epoch": 6.6, "learning_rate": 1.700585203947943e-05, "loss": 0.8628, "step": 75550 }, { "epoch": 6.6, "learning_rate": 1.7001484845838064e-05, "loss": 0.8941, "step": 75560 }, { "epoch": 6.6, "learning_rate": 1.6997117652196698e-05, "loss": 0.885, "step": 75570 }, { "epoch": 6.6, "learning_rate": 1.6992750458555334e-05, "loss": 0.9717, "step": 75580 }, { "epoch": 6.6, "learning_rate": 1.6988383264913967e-05, "loss": 0.7784, "step": 75590 }, { "epoch": 6.6, "learning_rate": 1.69840160712726e-05, "loss": 0.8364, "step": 75600 }, { "epoch": 6.6, "learning_rate": 1.6979648877631234e-05, "loss": 0.8074, "step": 75610 }, { "epoch": 6.6, "learning_rate": 1.697528168398987e-05, "loss": 0.8139, "step": 75620 }, { "epoch": 6.61, "learning_rate": 1.6970914490348504e-05, "loss": 0.8506, "step": 75630 }, { "epoch": 6.61, "learning_rate": 1.6966547296707137e-05, "loss": 0.9272, "step": 75640 }, { "epoch": 6.61, "learning_rate": 1.696218010306577e-05, "loss": 0.7966, "step": 75650 }, { "epoch": 6.61, "learning_rate": 1.6957812909424407e-05, "loss": 0.9114, "step": 75660 }, { "epoch": 6.61, "learning_rate": 1.695344571578304e-05, "loss": 0.8789, "step": 75670 }, { "epoch": 6.61, "learning_rate": 1.6949078522141673e-05, "loss": 0.8463, "step": 75680 }, { "epoch": 6.61, "learning_rate": 1.6944711328500306e-05, "loss": 1.0045, "step": 75690 }, { "epoch": 6.61, "learning_rate": 1.694034413485894e-05, "loss": 0.7521, "step": 75700 }, { "epoch": 6.61, "learning_rate": 1.6935976941217573e-05, "loss": 0.7972, "step": 75710 }, { "epoch": 6.61, "learning_rate": 1.693160974757621e-05, "loss": 1.01, "step": 75720 }, { "epoch": 6.61, "learning_rate": 1.6927242553934842e-05, "loss": 0.7739, "step": 75730 }, { "epoch": 6.62, "learning_rate": 1.692287536029348e-05, "loss": 0.8629, "step": 75740 }, { "epoch": 6.62, "learning_rate": 1.6918508166652112e-05, "loss": 0.8371, "step": 75750 }, { "epoch": 6.62, "learning_rate": 1.6914140973010745e-05, "loss": 0.7063, "step": 75760 }, { "epoch": 6.62, "learning_rate": 1.690977377936938e-05, "loss": 0.9122, "step": 75770 }, { "epoch": 6.62, "learning_rate": 1.6905406585728012e-05, "loss": 0.8292, "step": 75780 }, { "epoch": 6.62, "learning_rate": 1.6901039392086645e-05, "loss": 0.8945, "step": 75790 }, { "epoch": 6.62, "learning_rate": 1.6896672198445278e-05, "loss": 0.9138, "step": 75800 }, { "epoch": 6.62, "learning_rate": 1.689230500480391e-05, "loss": 0.7619, "step": 75810 }, { "epoch": 6.62, "learning_rate": 1.6887937811162548e-05, "loss": 0.8185, "step": 75820 }, { "epoch": 6.62, "learning_rate": 1.688357061752118e-05, "loss": 0.8238, "step": 75830 }, { "epoch": 6.62, "learning_rate": 1.6879203423879818e-05, "loss": 0.9287, "step": 75840 }, { "epoch": 6.63, "learning_rate": 1.687483623023845e-05, "loss": 0.7579, "step": 75850 }, { "epoch": 6.63, "learning_rate": 1.6870469036597084e-05, "loss": 0.9239, "step": 75860 }, { "epoch": 6.63, "learning_rate": 1.6866101842955717e-05, "loss": 0.9473, "step": 75870 }, { "epoch": 6.63, "learning_rate": 1.686173464931435e-05, "loss": 0.7866, "step": 75880 }, { "epoch": 6.63, "learning_rate": 1.6857367455672984e-05, "loss": 0.9722, "step": 75890 }, { "epoch": 6.63, "learning_rate": 1.6853000262031617e-05, "loss": 0.836, "step": 75900 }, { "epoch": 6.63, "learning_rate": 1.6848633068390254e-05, "loss": 0.9567, "step": 75910 }, { "epoch": 6.63, "learning_rate": 1.6844265874748887e-05, "loss": 0.8746, "step": 75920 }, { "epoch": 6.63, "learning_rate": 1.683989868110752e-05, "loss": 0.7756, "step": 75930 }, { "epoch": 6.63, "learning_rate": 1.6835531487466157e-05, "loss": 0.8564, "step": 75940 }, { "epoch": 6.63, "learning_rate": 1.683116429382479e-05, "loss": 0.8287, "step": 75950 }, { "epoch": 6.63, "learning_rate": 1.6826797100183423e-05, "loss": 0.8719, "step": 75960 }, { "epoch": 6.64, "learning_rate": 1.6822429906542056e-05, "loss": 0.9039, "step": 75970 }, { "epoch": 6.64, "learning_rate": 1.681806271290069e-05, "loss": 0.9598, "step": 75980 }, { "epoch": 6.64, "learning_rate": 1.6813695519259326e-05, "loss": 0.9414, "step": 75990 }, { "epoch": 6.64, "learning_rate": 1.680932832561796e-05, "loss": 0.8832, "step": 76000 }, { "epoch": 6.64, "learning_rate": 1.6804961131976592e-05, "loss": 0.7672, "step": 76010 }, { "epoch": 6.64, "learning_rate": 1.6800593938335226e-05, "loss": 0.9563, "step": 76020 }, { "epoch": 6.64, "learning_rate": 1.679622674469386e-05, "loss": 0.8623, "step": 76030 }, { "epoch": 6.64, "learning_rate": 1.6791859551052495e-05, "loss": 0.774, "step": 76040 }, { "epoch": 6.64, "learning_rate": 1.678749235741113e-05, "loss": 1.0845, "step": 76050 }, { "epoch": 6.64, "learning_rate": 1.6783125163769762e-05, "loss": 0.9118, "step": 76060 }, { "epoch": 6.64, "learning_rate": 1.67787579701284e-05, "loss": 0.837, "step": 76070 }, { "epoch": 6.65, "learning_rate": 1.677439077648703e-05, "loss": 1.0122, "step": 76080 }, { "epoch": 6.65, "learning_rate": 1.6770023582845665e-05, "loss": 0.8891, "step": 76090 }, { "epoch": 6.65, "learning_rate": 1.6765656389204298e-05, "loss": 1.0186, "step": 76100 }, { "epoch": 6.65, "learning_rate": 1.676128919556293e-05, "loss": 0.9533, "step": 76110 }, { "epoch": 6.65, "learning_rate": 1.6756922001921564e-05, "loss": 0.8851, "step": 76120 }, { "epoch": 6.65, "learning_rate": 1.6752554808280198e-05, "loss": 0.9636, "step": 76130 }, { "epoch": 6.65, "learning_rate": 1.6748187614638834e-05, "loss": 0.8612, "step": 76140 }, { "epoch": 6.65, "learning_rate": 1.674382042099747e-05, "loss": 0.7559, "step": 76150 }, { "epoch": 6.65, "learning_rate": 1.6739453227356104e-05, "loss": 0.8695, "step": 76160 }, { "epoch": 6.65, "learning_rate": 1.6735086033714737e-05, "loss": 0.8787, "step": 76170 }, { "epoch": 6.65, "learning_rate": 1.673071884007337e-05, "loss": 0.8614, "step": 76180 }, { "epoch": 6.65, "learning_rate": 1.6726351646432004e-05, "loss": 0.9151, "step": 76190 }, { "epoch": 6.66, "learning_rate": 1.6721984452790637e-05, "loss": 0.8078, "step": 76200 }, { "epoch": 6.66, "learning_rate": 1.671761725914927e-05, "loss": 0.7337, "step": 76210 }, { "epoch": 6.66, "learning_rate": 1.6713250065507903e-05, "loss": 0.858, "step": 76220 }, { "epoch": 6.66, "learning_rate": 1.6708882871866536e-05, "loss": 0.768, "step": 76230 }, { "epoch": 6.66, "learning_rate": 1.6704515678225173e-05, "loss": 0.8781, "step": 76240 }, { "epoch": 6.66, "learning_rate": 1.670014848458381e-05, "loss": 0.9242, "step": 76250 }, { "epoch": 6.66, "learning_rate": 1.6695781290942443e-05, "loss": 0.8321, "step": 76260 }, { "epoch": 6.66, "learning_rate": 1.6691414097301076e-05, "loss": 0.8809, "step": 76270 }, { "epoch": 6.66, "learning_rate": 1.668704690365971e-05, "loss": 0.8277, "step": 76280 }, { "epoch": 6.66, "learning_rate": 1.6682679710018342e-05, "loss": 0.8065, "step": 76290 }, { "epoch": 6.66, "learning_rate": 1.6678312516376976e-05, "loss": 0.8793, "step": 76300 }, { "epoch": 6.67, "learning_rate": 1.667394532273561e-05, "loss": 0.9148, "step": 76310 }, { "epoch": 6.67, "learning_rate": 1.6669578129094245e-05, "loss": 0.7659, "step": 76320 }, { "epoch": 6.67, "learning_rate": 1.666521093545288e-05, "loss": 0.9406, "step": 76330 }, { "epoch": 6.67, "learning_rate": 1.6660843741811512e-05, "loss": 0.8283, "step": 76340 }, { "epoch": 6.67, "learning_rate": 1.665647654817015e-05, "loss": 0.8588, "step": 76350 }, { "epoch": 6.67, "learning_rate": 1.665210935452878e-05, "loss": 1.0028, "step": 76360 }, { "epoch": 6.67, "learning_rate": 1.6647742160887415e-05, "loss": 1.024, "step": 76370 }, { "epoch": 6.67, "learning_rate": 1.6643374967246048e-05, "loss": 0.845, "step": 76380 }, { "epoch": 6.67, "learning_rate": 1.663900777360468e-05, "loss": 0.9327, "step": 76390 }, { "epoch": 6.67, "learning_rate": 1.6634640579963318e-05, "loss": 0.9449, "step": 76400 }, { "epoch": 6.67, "learning_rate": 1.663027338632195e-05, "loss": 0.8201, "step": 76410 }, { "epoch": 6.67, "learning_rate": 1.6625906192680584e-05, "loss": 1.0396, "step": 76420 }, { "epoch": 6.68, "learning_rate": 1.6621538999039217e-05, "loss": 0.8331, "step": 76430 }, { "epoch": 6.68, "learning_rate": 1.661717180539785e-05, "loss": 0.8801, "step": 76440 }, { "epoch": 6.68, "learning_rate": 1.6612804611756487e-05, "loss": 0.8421, "step": 76450 }, { "epoch": 6.68, "learning_rate": 1.660843741811512e-05, "loss": 0.8826, "step": 76460 }, { "epoch": 6.68, "learning_rate": 1.6604070224473754e-05, "loss": 0.7094, "step": 76470 }, { "epoch": 6.68, "learning_rate": 1.659970303083239e-05, "loss": 0.7809, "step": 76480 }, { "epoch": 6.68, "learning_rate": 1.6595335837191023e-05, "loss": 1.005, "step": 76490 }, { "epoch": 6.68, "learning_rate": 1.6590968643549657e-05, "loss": 0.7905, "step": 76500 }, { "epoch": 6.68, "learning_rate": 1.658660144990829e-05, "loss": 0.9088, "step": 76510 }, { "epoch": 6.68, "learning_rate": 1.6582234256266923e-05, "loss": 0.8341, "step": 76520 }, { "epoch": 6.68, "learning_rate": 1.6577867062625556e-05, "loss": 1.0027, "step": 76530 }, { "epoch": 6.69, "learning_rate": 1.657349986898419e-05, "loss": 0.7769, "step": 76540 }, { "epoch": 6.69, "learning_rate": 1.6569132675342826e-05, "loss": 0.84, "step": 76550 }, { "epoch": 6.69, "learning_rate": 1.656476548170146e-05, "loss": 0.9277, "step": 76560 }, { "epoch": 6.69, "learning_rate": 1.6560398288060096e-05, "loss": 0.8332, "step": 76570 }, { "epoch": 6.69, "learning_rate": 1.655603109441873e-05, "loss": 0.9498, "step": 76580 }, { "epoch": 6.69, "learning_rate": 1.6551663900777362e-05, "loss": 0.835, "step": 76590 }, { "epoch": 6.69, "learning_rate": 1.6547296707135995e-05, "loss": 0.8002, "step": 76600 }, { "epoch": 6.69, "learning_rate": 1.654292951349463e-05, "loss": 0.863, "step": 76610 }, { "epoch": 6.69, "learning_rate": 1.6538562319853262e-05, "loss": 0.9139, "step": 76620 }, { "epoch": 6.69, "learning_rate": 1.6534195126211895e-05, "loss": 0.9216, "step": 76630 }, { "epoch": 6.69, "learning_rate": 1.6529827932570528e-05, "loss": 0.7914, "step": 76640 }, { "epoch": 6.69, "learning_rate": 1.6525460738929165e-05, "loss": 0.8261, "step": 76650 }, { "epoch": 6.7, "learning_rate": 1.6521093545287798e-05, "loss": 0.9758, "step": 76660 }, { "epoch": 6.7, "learning_rate": 1.6516726351646435e-05, "loss": 0.9133, "step": 76670 }, { "epoch": 6.7, "learning_rate": 1.6512359158005068e-05, "loss": 0.715, "step": 76680 }, { "epoch": 6.7, "learning_rate": 1.65079919643637e-05, "loss": 0.8892, "step": 76690 }, { "epoch": 6.7, "learning_rate": 1.6503624770722334e-05, "loss": 0.8664, "step": 76700 }, { "epoch": 6.7, "learning_rate": 1.6499257577080967e-05, "loss": 0.7781, "step": 76710 }, { "epoch": 6.7, "learning_rate": 1.64948903834396e-05, "loss": 0.976, "step": 76720 }, { "epoch": 6.7, "learning_rate": 1.6490523189798237e-05, "loss": 0.9319, "step": 76730 }, { "epoch": 6.7, "learning_rate": 1.648615599615687e-05, "loss": 0.9036, "step": 76740 }, { "epoch": 6.7, "learning_rate": 1.6481788802515504e-05, "loss": 0.7841, "step": 76750 }, { "epoch": 6.7, "learning_rate": 1.6477421608874137e-05, "loss": 0.8826, "step": 76760 }, { "epoch": 6.71, "learning_rate": 1.6473054415232773e-05, "loss": 0.8993, "step": 76770 }, { "epoch": 6.71, "learning_rate": 1.6468687221591407e-05, "loss": 0.8224, "step": 76780 }, { "epoch": 6.71, "learning_rate": 1.646432002795004e-05, "loss": 0.8605, "step": 76790 }, { "epoch": 6.71, "learning_rate": 1.6459952834308673e-05, "loss": 0.811, "step": 76800 }, { "epoch": 6.71, "learning_rate": 1.645558564066731e-05, "loss": 0.7641, "step": 76810 }, { "epoch": 6.71, "learning_rate": 1.6451218447025943e-05, "loss": 1.0577, "step": 76820 }, { "epoch": 6.71, "learning_rate": 1.6446851253384576e-05, "loss": 0.9121, "step": 76830 }, { "epoch": 6.71, "learning_rate": 1.644248405974321e-05, "loss": 0.9242, "step": 76840 }, { "epoch": 6.71, "learning_rate": 1.6438116866101842e-05, "loss": 0.9326, "step": 76850 }, { "epoch": 6.71, "learning_rate": 1.6433749672460476e-05, "loss": 0.8105, "step": 76860 }, { "epoch": 6.71, "learning_rate": 1.6429382478819112e-05, "loss": 0.8841, "step": 76870 }, { "epoch": 6.71, "learning_rate": 1.6425015285177745e-05, "loss": 0.8538, "step": 76880 }, { "epoch": 6.72, "learning_rate": 1.6420648091536382e-05, "loss": 0.8407, "step": 76890 }, { "epoch": 6.72, "learning_rate": 1.6416280897895015e-05, "loss": 0.8533, "step": 76900 }, { "epoch": 6.72, "learning_rate": 1.641191370425365e-05, "loss": 0.8698, "step": 76910 }, { "epoch": 6.72, "learning_rate": 1.640754651061228e-05, "loss": 0.7595, "step": 76920 }, { "epoch": 6.72, "learning_rate": 1.6403179316970915e-05, "loss": 0.8506, "step": 76930 }, { "epoch": 6.72, "learning_rate": 1.6398812123329548e-05, "loss": 0.9469, "step": 76940 }, { "epoch": 6.72, "learning_rate": 1.639444492968818e-05, "loss": 0.9796, "step": 76950 }, { "epoch": 6.72, "learning_rate": 1.6390077736046818e-05, "loss": 0.8364, "step": 76960 }, { "epoch": 6.72, "learning_rate": 1.638571054240545e-05, "loss": 0.9406, "step": 76970 }, { "epoch": 6.72, "learning_rate": 1.6381343348764088e-05, "loss": 0.7607, "step": 76980 }, { "epoch": 6.72, "learning_rate": 1.637697615512272e-05, "loss": 0.7736, "step": 76990 }, { "epoch": 6.73, "learning_rate": 1.6372608961481354e-05, "loss": 1.0139, "step": 77000 }, { "epoch": 6.73, "learning_rate": 1.6368241767839987e-05, "loss": 0.7458, "step": 77010 }, { "epoch": 6.73, "learning_rate": 1.636387457419862e-05, "loss": 0.9584, "step": 77020 }, { "epoch": 6.73, "learning_rate": 1.6359507380557254e-05, "loss": 0.8709, "step": 77030 }, { "epoch": 6.73, "learning_rate": 1.6355140186915887e-05, "loss": 0.8792, "step": 77040 }, { "epoch": 6.73, "learning_rate": 1.635077299327452e-05, "loss": 0.9605, "step": 77050 }, { "epoch": 6.73, "learning_rate": 1.6346405799633157e-05, "loss": 0.8141, "step": 77060 }, { "epoch": 6.73, "learning_rate": 1.634203860599179e-05, "loss": 0.8496, "step": 77070 }, { "epoch": 6.73, "learning_rate": 1.6337671412350426e-05, "loss": 0.8523, "step": 77080 }, { "epoch": 6.73, "learning_rate": 1.633330421870906e-05, "loss": 0.994, "step": 77090 }, { "epoch": 6.73, "learning_rate": 1.6328937025067693e-05, "loss": 0.8766, "step": 77100 }, { "epoch": 6.74, "learning_rate": 1.6324569831426326e-05, "loss": 0.8286, "step": 77110 }, { "epoch": 6.74, "learning_rate": 1.632020263778496e-05, "loss": 0.9537, "step": 77120 }, { "epoch": 6.74, "learning_rate": 1.6315835444143592e-05, "loss": 0.9352, "step": 77130 }, { "epoch": 6.74, "learning_rate": 1.631146825050223e-05, "loss": 0.7755, "step": 77140 }, { "epoch": 6.74, "learning_rate": 1.6307101056860862e-05, "loss": 0.9414, "step": 77150 }, { "epoch": 6.74, "learning_rate": 1.6302733863219495e-05, "loss": 0.8202, "step": 77160 }, { "epoch": 6.74, "learning_rate": 1.629836666957813e-05, "loss": 0.8787, "step": 77170 }, { "epoch": 6.74, "learning_rate": 1.6293999475936765e-05, "loss": 0.8157, "step": 77180 }, { "epoch": 6.74, "learning_rate": 1.62896322822954e-05, "loss": 0.9056, "step": 77190 }, { "epoch": 6.74, "learning_rate": 1.628526508865403e-05, "loss": 0.9012, "step": 77200 }, { "epoch": 6.74, "learning_rate": 1.6280897895012665e-05, "loss": 0.8825, "step": 77210 }, { "epoch": 6.74, "learning_rate": 1.62765307013713e-05, "loss": 0.9194, "step": 77220 }, { "epoch": 6.75, "learning_rate": 1.6272163507729935e-05, "loss": 0.9493, "step": 77230 }, { "epoch": 6.75, "learning_rate": 1.6267796314088568e-05, "loss": 0.8622, "step": 77240 }, { "epoch": 6.75, "learning_rate": 1.62634291204472e-05, "loss": 0.9756, "step": 77250 }, { "epoch": 6.75, "learning_rate": 1.6259061926805834e-05, "loss": 0.7526, "step": 77260 }, { "epoch": 6.75, "learning_rate": 1.6254694733164467e-05, "loss": 0.8638, "step": 77270 }, { "epoch": 6.75, "learning_rate": 1.6250327539523104e-05, "loss": 1.0546, "step": 77280 }, { "epoch": 6.75, "learning_rate": 1.6245960345881737e-05, "loss": 0.8905, "step": 77290 }, { "epoch": 6.75, "learning_rate": 1.6241593152240374e-05, "loss": 0.9293, "step": 77300 }, { "epoch": 6.75, "learning_rate": 1.6237225958599007e-05, "loss": 0.8342, "step": 77310 }, { "epoch": 6.75, "learning_rate": 1.623285876495764e-05, "loss": 0.8549, "step": 77320 }, { "epoch": 6.75, "learning_rate": 1.6228491571316273e-05, "loss": 0.9766, "step": 77330 }, { "epoch": 6.76, "learning_rate": 1.6224124377674907e-05, "loss": 0.9373, "step": 77340 }, { "epoch": 6.76, "learning_rate": 1.621975718403354e-05, "loss": 0.7871, "step": 77350 }, { "epoch": 6.76, "learning_rate": 1.6215389990392173e-05, "loss": 0.8363, "step": 77360 }, { "epoch": 6.76, "learning_rate": 1.6211022796750806e-05, "loss": 0.9412, "step": 77370 }, { "epoch": 6.76, "learning_rate": 1.6206655603109443e-05, "loss": 0.975, "step": 77380 }, { "epoch": 6.76, "learning_rate": 1.620228840946808e-05, "loss": 0.841, "step": 77390 }, { "epoch": 6.76, "learning_rate": 1.6197921215826713e-05, "loss": 0.8368, "step": 77400 }, { "epoch": 6.76, "learning_rate": 1.6193554022185346e-05, "loss": 0.8181, "step": 77410 }, { "epoch": 6.76, "learning_rate": 1.618918682854398e-05, "loss": 0.8507, "step": 77420 }, { "epoch": 6.76, "learning_rate": 1.6184819634902612e-05, "loss": 0.8861, "step": 77430 }, { "epoch": 6.76, "learning_rate": 1.6180452441261245e-05, "loss": 0.9293, "step": 77440 }, { "epoch": 6.76, "learning_rate": 1.617608524761988e-05, "loss": 0.9697, "step": 77450 }, { "epoch": 6.77, "learning_rate": 1.6171718053978512e-05, "loss": 0.7748, "step": 77460 }, { "epoch": 6.77, "learning_rate": 1.616735086033715e-05, "loss": 0.8433, "step": 77470 }, { "epoch": 6.77, "learning_rate": 1.616298366669578e-05, "loss": 0.8655, "step": 77480 }, { "epoch": 6.77, "learning_rate": 1.6158616473054418e-05, "loss": 0.9224, "step": 77490 }, { "epoch": 6.77, "learning_rate": 1.615424927941305e-05, "loss": 0.9245, "step": 77500 }, { "epoch": 6.77, "learning_rate": 1.6149882085771685e-05, "loss": 0.8791, "step": 77510 }, { "epoch": 6.77, "learning_rate": 1.6145514892130318e-05, "loss": 0.7336, "step": 77520 }, { "epoch": 6.77, "learning_rate": 1.614114769848895e-05, "loss": 0.8737, "step": 77530 }, { "epoch": 6.77, "learning_rate": 1.6136780504847584e-05, "loss": 0.9127, "step": 77540 }, { "epoch": 6.77, "learning_rate": 1.613241331120622e-05, "loss": 0.8247, "step": 77550 }, { "epoch": 6.77, "learning_rate": 1.6128046117564854e-05, "loss": 0.944, "step": 77560 }, { "epoch": 6.78, "learning_rate": 1.6123678923923487e-05, "loss": 0.7969, "step": 77570 }, { "epoch": 6.78, "learning_rate": 1.611931173028212e-05, "loss": 0.8106, "step": 77580 }, { "epoch": 6.78, "learning_rate": 1.6114944536640757e-05, "loss": 0.9757, "step": 77590 }, { "epoch": 6.78, "learning_rate": 1.611057734299939e-05, "loss": 0.88, "step": 77600 }, { "epoch": 6.78, "learning_rate": 1.6106210149358023e-05, "loss": 0.7905, "step": 77610 }, { "epoch": 6.78, "learning_rate": 1.6101842955716657e-05, "loss": 0.8008, "step": 77620 }, { "epoch": 6.78, "learning_rate": 1.6097475762075293e-05, "loss": 0.8461, "step": 77630 }, { "epoch": 6.78, "learning_rate": 1.6093108568433926e-05, "loss": 0.9588, "step": 77640 }, { "epoch": 6.78, "learning_rate": 1.608874137479256e-05, "loss": 0.8169, "step": 77650 }, { "epoch": 6.78, "learning_rate": 1.6084374181151193e-05, "loss": 0.8434, "step": 77660 }, { "epoch": 6.78, "learning_rate": 1.6080006987509826e-05, "loss": 0.9913, "step": 77670 }, { "epoch": 6.78, "learning_rate": 1.607563979386846e-05, "loss": 0.9204, "step": 77680 }, { "epoch": 6.79, "learning_rate": 1.6071272600227096e-05, "loss": 0.9142, "step": 77690 }, { "epoch": 6.79, "learning_rate": 1.606690540658573e-05, "loss": 0.8519, "step": 77700 }, { "epoch": 6.79, "learning_rate": 1.6062538212944362e-05, "loss": 0.8354, "step": 77710 }, { "epoch": 6.79, "learning_rate": 1.6058171019303e-05, "loss": 0.8135, "step": 77720 }, { "epoch": 6.79, "learning_rate": 1.6053803825661632e-05, "loss": 0.8186, "step": 77730 }, { "epoch": 6.79, "learning_rate": 1.6049436632020265e-05, "loss": 0.8896, "step": 77740 }, { "epoch": 6.79, "learning_rate": 1.60450694383789e-05, "loss": 0.8983, "step": 77750 }, { "epoch": 6.79, "learning_rate": 1.604070224473753e-05, "loss": 0.9068, "step": 77760 }, { "epoch": 6.79, "learning_rate": 1.6036335051096165e-05, "loss": 0.9392, "step": 77770 }, { "epoch": 6.79, "learning_rate": 1.6031967857454798e-05, "loss": 0.936, "step": 77780 }, { "epoch": 6.79, "learning_rate": 1.6027600663813435e-05, "loss": 0.9362, "step": 77790 }, { "epoch": 6.8, "learning_rate": 1.6023233470172068e-05, "loss": 0.8902, "step": 77800 }, { "epoch": 6.8, "learning_rate": 1.6018866276530704e-05, "loss": 0.7908, "step": 77810 }, { "epoch": 6.8, "learning_rate": 1.6014499082889338e-05, "loss": 0.8501, "step": 77820 }, { "epoch": 6.8, "learning_rate": 1.601013188924797e-05, "loss": 0.8289, "step": 77830 }, { "epoch": 6.8, "learning_rate": 1.6005764695606604e-05, "loss": 0.9957, "step": 77840 }, { "epoch": 6.8, "learning_rate": 1.6001397501965237e-05, "loss": 0.7714, "step": 77850 }, { "epoch": 6.8, "learning_rate": 1.599703030832387e-05, "loss": 0.8413, "step": 77860 }, { "epoch": 6.8, "learning_rate": 1.5992663114682504e-05, "loss": 0.8584, "step": 77870 }, { "epoch": 6.8, "learning_rate": 1.598829592104114e-05, "loss": 0.8934, "step": 77880 }, { "epoch": 6.8, "learning_rate": 1.5983928727399773e-05, "loss": 0.8827, "step": 77890 }, { "epoch": 6.8, "learning_rate": 1.5979561533758407e-05, "loss": 0.8585, "step": 77900 }, { "epoch": 6.8, "learning_rate": 1.5975194340117043e-05, "loss": 0.8944, "step": 77910 }, { "epoch": 6.81, "learning_rate": 1.5970827146475676e-05, "loss": 0.8895, "step": 77920 }, { "epoch": 6.81, "learning_rate": 1.596645995283431e-05, "loss": 0.8799, "step": 77930 }, { "epoch": 6.81, "learning_rate": 1.5962092759192943e-05, "loss": 0.7717, "step": 77940 }, { "epoch": 6.81, "learning_rate": 1.5957725565551576e-05, "loss": 0.8065, "step": 77950 }, { "epoch": 6.81, "learning_rate": 1.5953358371910213e-05, "loss": 0.958, "step": 77960 }, { "epoch": 6.81, "learning_rate": 1.5948991178268846e-05, "loss": 0.9178, "step": 77970 }, { "epoch": 6.81, "learning_rate": 1.594462398462748e-05, "loss": 0.7489, "step": 77980 }, { "epoch": 6.81, "learning_rate": 1.5940256790986112e-05, "loss": 0.8491, "step": 77990 }, { "epoch": 6.81, "learning_rate": 1.5935889597344745e-05, "loss": 0.8548, "step": 78000 }, { "epoch": 6.81, "learning_rate": 1.5931522403703382e-05, "loss": 0.9884, "step": 78010 }, { "epoch": 6.81, "learning_rate": 1.5927155210062015e-05, "loss": 1.0, "step": 78020 }, { "epoch": 6.82, "learning_rate": 1.592278801642065e-05, "loss": 0.9053, "step": 78030 }, { "epoch": 6.82, "learning_rate": 1.5918420822779285e-05, "loss": 0.8224, "step": 78040 }, { "epoch": 6.82, "learning_rate": 1.5914053629137918e-05, "loss": 1.0008, "step": 78050 }, { "epoch": 6.82, "learning_rate": 1.590968643549655e-05, "loss": 0.8547, "step": 78060 }, { "epoch": 6.82, "learning_rate": 1.5905319241855185e-05, "loss": 0.8837, "step": 78070 }, { "epoch": 6.82, "learning_rate": 1.5900952048213818e-05, "loss": 0.7127, "step": 78080 }, { "epoch": 6.82, "learning_rate": 1.589658485457245e-05, "loss": 0.9232, "step": 78090 }, { "epoch": 6.82, "learning_rate": 1.5892217660931084e-05, "loss": 0.8936, "step": 78100 }, { "epoch": 6.82, "learning_rate": 1.588785046728972e-05, "loss": 0.8396, "step": 78110 }, { "epoch": 6.82, "learning_rate": 1.5883483273648354e-05, "loss": 0.9951, "step": 78120 }, { "epoch": 6.82, "learning_rate": 1.587911608000699e-05, "loss": 0.9198, "step": 78130 }, { "epoch": 6.83, "learning_rate": 1.5874748886365624e-05, "loss": 0.9665, "step": 78140 }, { "epoch": 6.83, "learning_rate": 1.5870381692724257e-05, "loss": 0.8447, "step": 78150 }, { "epoch": 6.83, "learning_rate": 1.586601449908289e-05, "loss": 0.7661, "step": 78160 }, { "epoch": 6.83, "learning_rate": 1.5861647305441523e-05, "loss": 0.7298, "step": 78170 }, { "epoch": 6.83, "learning_rate": 1.5857280111800157e-05, "loss": 0.8594, "step": 78180 }, { "epoch": 6.83, "learning_rate": 1.585291291815879e-05, "loss": 0.8349, "step": 78190 }, { "epoch": 6.83, "learning_rate": 1.5848545724517423e-05, "loss": 0.7896, "step": 78200 }, { "epoch": 6.83, "learning_rate": 1.584417853087606e-05, "loss": 0.8375, "step": 78210 }, { "epoch": 6.83, "learning_rate": 1.5839811337234696e-05, "loss": 0.9816, "step": 78220 }, { "epoch": 6.83, "learning_rate": 1.583544414359333e-05, "loss": 0.8492, "step": 78230 }, { "epoch": 6.83, "learning_rate": 1.5831076949951963e-05, "loss": 0.9546, "step": 78240 }, { "epoch": 6.83, "learning_rate": 1.5826709756310596e-05, "loss": 0.8582, "step": 78250 }, { "epoch": 6.84, "learning_rate": 1.582234256266923e-05, "loss": 0.8366, "step": 78260 }, { "epoch": 6.84, "learning_rate": 1.5817975369027862e-05, "loss": 0.8822, "step": 78270 }, { "epoch": 6.84, "learning_rate": 1.5813608175386495e-05, "loss": 0.8576, "step": 78280 }, { "epoch": 6.84, "learning_rate": 1.5809240981745132e-05, "loss": 0.8956, "step": 78290 }, { "epoch": 6.84, "learning_rate": 1.5804873788103765e-05, "loss": 0.907, "step": 78300 }, { "epoch": 6.84, "learning_rate": 1.58005065944624e-05, "loss": 1.0101, "step": 78310 }, { "epoch": 6.84, "learning_rate": 1.5796139400821035e-05, "loss": 0.8289, "step": 78320 }, { "epoch": 6.84, "learning_rate": 1.5791772207179668e-05, "loss": 0.8921, "step": 78330 }, { "epoch": 6.84, "learning_rate": 1.57874050135383e-05, "loss": 0.8459, "step": 78340 }, { "epoch": 6.84, "learning_rate": 1.5783037819896935e-05, "loss": 0.7757, "step": 78350 }, { "epoch": 6.84, "learning_rate": 1.5778670626255568e-05, "loss": 0.8391, "step": 78360 }, { "epoch": 6.85, "learning_rate": 1.5774303432614204e-05, "loss": 0.8627, "step": 78370 }, { "epoch": 6.85, "learning_rate": 1.5769936238972838e-05, "loss": 0.8491, "step": 78380 }, { "epoch": 6.85, "learning_rate": 1.576556904533147e-05, "loss": 0.853, "step": 78390 }, { "epoch": 6.85, "learning_rate": 1.5761201851690104e-05, "loss": 0.8905, "step": 78400 }, { "epoch": 6.85, "learning_rate": 1.5756834658048737e-05, "loss": 0.8435, "step": 78410 }, { "epoch": 6.85, "learning_rate": 1.5752467464407374e-05, "loss": 0.8728, "step": 78420 }, { "epoch": 6.85, "learning_rate": 1.5748100270766007e-05, "loss": 0.888, "step": 78430 }, { "epoch": 6.85, "learning_rate": 1.574373307712464e-05, "loss": 0.8968, "step": 78440 }, { "epoch": 6.85, "learning_rate": 1.5739365883483277e-05, "loss": 0.9931, "step": 78450 }, { "epoch": 6.85, "learning_rate": 1.573499868984191e-05, "loss": 0.8155, "step": 78460 }, { "epoch": 6.85, "learning_rate": 1.5730631496200543e-05, "loss": 0.811, "step": 78470 }, { "epoch": 6.85, "learning_rate": 1.5726264302559176e-05, "loss": 0.9417, "step": 78480 }, { "epoch": 6.86, "learning_rate": 1.572189710891781e-05, "loss": 0.881, "step": 78490 }, { "epoch": 6.86, "learning_rate": 1.5717529915276443e-05, "loss": 0.8864, "step": 78500 }, { "epoch": 6.86, "learning_rate": 1.5713162721635076e-05, "loss": 0.7934, "step": 78510 }, { "epoch": 6.86, "learning_rate": 1.5708795527993713e-05, "loss": 0.9958, "step": 78520 }, { "epoch": 6.86, "learning_rate": 1.5704428334352346e-05, "loss": 0.8984, "step": 78530 }, { "epoch": 6.86, "learning_rate": 1.5700061140710982e-05, "loss": 0.7929, "step": 78540 }, { "epoch": 6.86, "learning_rate": 1.5695693947069616e-05, "loss": 0.8464, "step": 78550 }, { "epoch": 6.86, "learning_rate": 1.569132675342825e-05, "loss": 0.8228, "step": 78560 }, { "epoch": 6.86, "learning_rate": 1.5686959559786882e-05, "loss": 0.8076, "step": 78570 }, { "epoch": 6.86, "learning_rate": 1.5682592366145515e-05, "loss": 0.6978, "step": 78580 }, { "epoch": 6.86, "learning_rate": 1.567822517250415e-05, "loss": 0.862, "step": 78590 }, { "epoch": 6.87, "learning_rate": 1.567385797886278e-05, "loss": 0.9116, "step": 78600 }, { "epoch": 6.87, "learning_rate": 1.5669490785221415e-05, "loss": 0.9003, "step": 78610 }, { "epoch": 6.87, "learning_rate": 1.566512359158005e-05, "loss": 0.8692, "step": 78620 }, { "epoch": 6.87, "learning_rate": 1.5660756397938685e-05, "loss": 0.8879, "step": 78630 }, { "epoch": 6.87, "learning_rate": 1.565638920429732e-05, "loss": 0.7908, "step": 78640 }, { "epoch": 6.87, "learning_rate": 1.5652022010655954e-05, "loss": 0.8063, "step": 78650 }, { "epoch": 6.87, "learning_rate": 1.5647654817014588e-05, "loss": 0.92, "step": 78660 }, { "epoch": 6.87, "learning_rate": 1.564328762337322e-05, "loss": 0.8517, "step": 78670 }, { "epoch": 6.87, "learning_rate": 1.5638920429731854e-05, "loss": 0.9713, "step": 78680 }, { "epoch": 6.87, "learning_rate": 1.5634553236090487e-05, "loss": 0.8582, "step": 78690 }, { "epoch": 6.87, "learning_rate": 1.5630186042449124e-05, "loss": 0.9535, "step": 78700 }, { "epoch": 6.87, "learning_rate": 1.5625818848807757e-05, "loss": 0.9042, "step": 78710 }, { "epoch": 6.88, "learning_rate": 1.562145165516639e-05, "loss": 0.8615, "step": 78720 }, { "epoch": 6.88, "learning_rate": 1.5617084461525023e-05, "loss": 0.8641, "step": 78730 }, { "epoch": 6.88, "learning_rate": 1.561271726788366e-05, "loss": 0.8092, "step": 78740 }, { "epoch": 6.88, "learning_rate": 1.5608350074242293e-05, "loss": 0.8646, "step": 78750 }, { "epoch": 6.88, "learning_rate": 1.5603982880600926e-05, "loss": 0.8234, "step": 78760 }, { "epoch": 6.88, "learning_rate": 1.559961568695956e-05, "loss": 0.8982, "step": 78770 }, { "epoch": 6.88, "learning_rate": 1.5595248493318196e-05, "loss": 0.7946, "step": 78780 }, { "epoch": 6.88, "learning_rate": 1.559088129967683e-05, "loss": 0.8568, "step": 78790 }, { "epoch": 6.88, "learning_rate": 1.5586514106035463e-05, "loss": 0.8793, "step": 78800 }, { "epoch": 6.88, "learning_rate": 1.5582146912394096e-05, "loss": 0.9098, "step": 78810 }, { "epoch": 6.88, "learning_rate": 1.557777971875273e-05, "loss": 0.8639, "step": 78820 }, { "epoch": 6.89, "learning_rate": 1.5573412525111362e-05, "loss": 0.8498, "step": 78830 }, { "epoch": 6.89, "learning_rate": 1.556904533147e-05, "loss": 0.8368, "step": 78840 }, { "epoch": 6.89, "learning_rate": 1.5564678137828632e-05, "loss": 0.8107, "step": 78850 }, { "epoch": 6.89, "learning_rate": 1.5560310944187265e-05, "loss": 0.8347, "step": 78860 }, { "epoch": 6.89, "learning_rate": 1.5555943750545902e-05, "loss": 0.8483, "step": 78870 }, { "epoch": 6.89, "learning_rate": 1.5551576556904535e-05, "loss": 0.7327, "step": 78880 }, { "epoch": 6.89, "learning_rate": 1.5547209363263168e-05, "loss": 0.8598, "step": 78890 }, { "epoch": 6.89, "learning_rate": 1.55428421696218e-05, "loss": 0.8461, "step": 78900 }, { "epoch": 6.89, "learning_rate": 1.5538474975980435e-05, "loss": 0.9098, "step": 78910 }, { "epoch": 6.89, "learning_rate": 1.5534107782339068e-05, "loss": 0.8926, "step": 78920 }, { "epoch": 6.89, "learning_rate": 1.55297405886977e-05, "loss": 0.9741, "step": 78930 }, { "epoch": 6.89, "learning_rate": 1.5525373395056338e-05, "loss": 0.7957, "step": 78940 }, { "epoch": 6.9, "learning_rate": 1.5521006201414974e-05, "loss": 0.9209, "step": 78950 }, { "epoch": 6.9, "learning_rate": 1.5516639007773607e-05, "loss": 0.9447, "step": 78960 }, { "epoch": 6.9, "learning_rate": 1.551227181413224e-05, "loss": 0.8736, "step": 78970 }, { "epoch": 6.9, "learning_rate": 1.5507904620490874e-05, "loss": 0.8465, "step": 78980 }, { "epoch": 6.9, "learning_rate": 1.5503537426849507e-05, "loss": 1.0109, "step": 78990 }, { "epoch": 6.9, "learning_rate": 1.549917023320814e-05, "loss": 0.6752, "step": 79000 }, { "epoch": 6.9, "learning_rate": 1.5494803039566773e-05, "loss": 0.9033, "step": 79010 }, { "epoch": 6.9, "learning_rate": 1.5490435845925407e-05, "loss": 0.8346, "step": 79020 }, { "epoch": 6.9, "learning_rate": 1.5486068652284043e-05, "loss": 1.0978, "step": 79030 }, { "epoch": 6.9, "learning_rate": 1.5481701458642676e-05, "loss": 0.9137, "step": 79040 }, { "epoch": 6.9, "learning_rate": 1.5477334265001313e-05, "loss": 0.8679, "step": 79050 }, { "epoch": 6.91, "learning_rate": 1.5472967071359946e-05, "loss": 0.8325, "step": 79060 }, { "epoch": 6.91, "learning_rate": 1.546859987771858e-05, "loss": 0.7637, "step": 79070 }, { "epoch": 6.91, "learning_rate": 1.5464232684077213e-05, "loss": 0.9311, "step": 79080 }, { "epoch": 6.91, "learning_rate": 1.5459865490435846e-05, "loss": 0.8314, "step": 79090 }, { "epoch": 6.91, "learning_rate": 1.545549829679448e-05, "loss": 0.8664, "step": 79100 }, { "epoch": 6.91, "learning_rate": 1.5451131103153116e-05, "loss": 0.8874, "step": 79110 }, { "epoch": 6.91, "learning_rate": 1.544676390951175e-05, "loss": 0.8579, "step": 79120 }, { "epoch": 6.91, "learning_rate": 1.5442396715870382e-05, "loss": 0.8853, "step": 79130 }, { "epoch": 6.91, "learning_rate": 1.5438029522229015e-05, "loss": 0.9375, "step": 79140 }, { "epoch": 6.91, "learning_rate": 1.5433662328587652e-05, "loss": 0.8561, "step": 79150 }, { "epoch": 6.91, "learning_rate": 1.5429295134946285e-05, "loss": 0.9253, "step": 79160 }, { "epoch": 6.92, "learning_rate": 1.5424927941304918e-05, "loss": 0.901, "step": 79170 }, { "epoch": 6.92, "learning_rate": 1.542056074766355e-05, "loss": 0.8779, "step": 79180 }, { "epoch": 6.92, "learning_rate": 1.5416193554022188e-05, "loss": 0.8702, "step": 79190 }, { "epoch": 6.92, "learning_rate": 1.541182636038082e-05, "loss": 0.9193, "step": 79200 }, { "epoch": 6.92, "learning_rate": 1.5407459166739454e-05, "loss": 0.8578, "step": 79210 }, { "epoch": 6.92, "learning_rate": 1.5403091973098088e-05, "loss": 0.7789, "step": 79220 }, { "epoch": 6.92, "learning_rate": 1.539872477945672e-05, "loss": 0.9233, "step": 79230 }, { "epoch": 6.92, "learning_rate": 1.5394357585815354e-05, "loss": 0.8907, "step": 79240 }, { "epoch": 6.92, "learning_rate": 1.538999039217399e-05, "loss": 0.9396, "step": 79250 }, { "epoch": 6.92, "learning_rate": 1.5385623198532624e-05, "loss": 0.8325, "step": 79260 }, { "epoch": 6.92, "learning_rate": 1.5381256004891257e-05, "loss": 0.9402, "step": 79270 }, { "epoch": 6.92, "learning_rate": 1.5376888811249894e-05, "loss": 0.8434, "step": 79280 }, { "epoch": 6.93, "learning_rate": 1.5372521617608527e-05, "loss": 0.8962, "step": 79290 }, { "epoch": 6.93, "learning_rate": 1.536815442396716e-05, "loss": 0.827, "step": 79300 }, { "epoch": 6.93, "learning_rate": 1.5363787230325793e-05, "loss": 0.9542, "step": 79310 }, { "epoch": 6.93, "learning_rate": 1.5359420036684426e-05, "loss": 0.8419, "step": 79320 }, { "epoch": 6.93, "learning_rate": 1.535505284304306e-05, "loss": 0.781, "step": 79330 }, { "epoch": 6.93, "learning_rate": 1.5350685649401693e-05, "loss": 1.0081, "step": 79340 }, { "epoch": 6.93, "learning_rate": 1.534631845576033e-05, "loss": 0.8086, "step": 79350 }, { "epoch": 6.93, "learning_rate": 1.5341951262118963e-05, "loss": 0.8862, "step": 79360 }, { "epoch": 6.93, "learning_rate": 1.53375840684776e-05, "loss": 0.7843, "step": 79370 }, { "epoch": 6.93, "learning_rate": 1.5333216874836232e-05, "loss": 0.8925, "step": 79380 }, { "epoch": 6.93, "learning_rate": 1.5328849681194866e-05, "loss": 0.8238, "step": 79390 }, { "epoch": 6.94, "learning_rate": 1.53244824875535e-05, "loss": 0.9223, "step": 79400 }, { "epoch": 6.94, "learning_rate": 1.5320115293912132e-05, "loss": 0.8474, "step": 79410 }, { "epoch": 6.94, "learning_rate": 1.5315748100270765e-05, "loss": 0.8225, "step": 79420 }, { "epoch": 6.94, "learning_rate": 1.53113809066294e-05, "loss": 0.9586, "step": 79430 }, { "epoch": 6.94, "learning_rate": 1.5307013712988035e-05, "loss": 0.9034, "step": 79440 }, { "epoch": 6.94, "learning_rate": 1.5302646519346668e-05, "loss": 0.902, "step": 79450 }, { "epoch": 6.94, "learning_rate": 1.52982793257053e-05, "loss": 0.835, "step": 79460 }, { "epoch": 6.94, "learning_rate": 1.5293912132063938e-05, "loss": 0.7401, "step": 79470 }, { "epoch": 6.94, "learning_rate": 1.528954493842257e-05, "loss": 0.8495, "step": 79480 }, { "epoch": 6.94, "learning_rate": 1.5285177744781204e-05, "loss": 0.8578, "step": 79490 }, { "epoch": 6.94, "learning_rate": 1.5280810551139838e-05, "loss": 0.8443, "step": 79500 }, { "epoch": 6.94, "learning_rate": 1.527644335749847e-05, "loss": 0.9013, "step": 79510 }, { "epoch": 6.95, "learning_rate": 1.5272076163857107e-05, "loss": 0.8062, "step": 79520 }, { "epoch": 6.95, "learning_rate": 1.526770897021574e-05, "loss": 0.8641, "step": 79530 }, { "epoch": 6.95, "learning_rate": 1.5263341776574374e-05, "loss": 0.8283, "step": 79540 }, { "epoch": 6.95, "learning_rate": 1.5258974582933007e-05, "loss": 0.8473, "step": 79550 }, { "epoch": 6.95, "learning_rate": 1.5254607389291642e-05, "loss": 0.9566, "step": 79560 }, { "epoch": 6.95, "learning_rate": 1.5250240195650275e-05, "loss": 0.9049, "step": 79570 }, { "epoch": 6.95, "learning_rate": 1.5245873002008908e-05, "loss": 0.8403, "step": 79580 }, { "epoch": 6.95, "learning_rate": 1.5241505808367543e-05, "loss": 0.8495, "step": 79590 }, { "epoch": 6.95, "learning_rate": 1.5237138614726176e-05, "loss": 0.9146, "step": 79600 }, { "epoch": 6.95, "learning_rate": 1.5232771421084813e-05, "loss": 0.8736, "step": 79610 }, { "epoch": 6.95, "learning_rate": 1.5228404227443446e-05, "loss": 0.8821, "step": 79620 }, { "epoch": 6.96, "learning_rate": 1.522403703380208e-05, "loss": 0.9869, "step": 79630 }, { "epoch": 6.96, "learning_rate": 1.5219669840160714e-05, "loss": 0.6989, "step": 79640 }, { "epoch": 6.96, "learning_rate": 1.5215302646519348e-05, "loss": 0.88, "step": 79650 }, { "epoch": 6.96, "learning_rate": 1.521093545287798e-05, "loss": 0.7451, "step": 79660 }, { "epoch": 6.96, "learning_rate": 1.5206568259236614e-05, "loss": 0.7853, "step": 79670 }, { "epoch": 6.96, "learning_rate": 1.5202201065595247e-05, "loss": 0.8414, "step": 79680 }, { "epoch": 6.96, "learning_rate": 1.5197833871953884e-05, "loss": 0.9237, "step": 79690 }, { "epoch": 6.96, "learning_rate": 1.5193466678312519e-05, "loss": 0.8215, "step": 79700 }, { "epoch": 6.96, "learning_rate": 1.5189099484671152e-05, "loss": 0.8916, "step": 79710 }, { "epoch": 6.96, "learning_rate": 1.5184732291029785e-05, "loss": 0.8309, "step": 79720 }, { "epoch": 6.96, "learning_rate": 1.5180365097388418e-05, "loss": 0.956, "step": 79730 }, { "epoch": 6.96, "learning_rate": 1.5175997903747053e-05, "loss": 0.7258, "step": 79740 }, { "epoch": 6.97, "learning_rate": 1.5171630710105686e-05, "loss": 0.9366, "step": 79750 }, { "epoch": 6.97, "learning_rate": 1.516726351646432e-05, "loss": 0.8205, "step": 79760 }, { "epoch": 6.97, "learning_rate": 1.5162896322822956e-05, "loss": 0.8208, "step": 79770 }, { "epoch": 6.97, "learning_rate": 1.515852912918159e-05, "loss": 0.834, "step": 79780 }, { "epoch": 6.97, "learning_rate": 1.5154161935540223e-05, "loss": 0.8977, "step": 79790 }, { "epoch": 6.97, "learning_rate": 1.5149794741898857e-05, "loss": 0.8762, "step": 79800 }, { "epoch": 6.97, "learning_rate": 1.514542754825749e-05, "loss": 0.8483, "step": 79810 }, { "epoch": 6.97, "learning_rate": 1.5141060354616124e-05, "loss": 0.8313, "step": 79820 }, { "epoch": 6.97, "learning_rate": 1.5136693160974757e-05, "loss": 0.9631, "step": 79830 }, { "epoch": 6.97, "learning_rate": 1.5132325967333392e-05, "loss": 0.9458, "step": 79840 }, { "epoch": 6.97, "learning_rate": 1.5127958773692027e-05, "loss": 0.7959, "step": 79850 }, { "epoch": 6.98, "learning_rate": 1.5123591580050662e-05, "loss": 0.9817, "step": 79860 }, { "epoch": 6.98, "learning_rate": 1.5119224386409295e-05, "loss": 0.8067, "step": 79870 }, { "epoch": 6.98, "learning_rate": 1.5114857192767928e-05, "loss": 0.9254, "step": 79880 }, { "epoch": 6.98, "learning_rate": 1.5110489999126561e-05, "loss": 1.0353, "step": 79890 }, { "epoch": 6.98, "learning_rate": 1.5106122805485196e-05, "loss": 0.9615, "step": 79900 }, { "epoch": 6.98, "learning_rate": 1.510175561184383e-05, "loss": 0.8383, "step": 79910 }, { "epoch": 6.98, "learning_rate": 1.5097388418202463e-05, "loss": 0.7644, "step": 79920 }, { "epoch": 6.98, "learning_rate": 1.50930212245611e-05, "loss": 0.9419, "step": 79930 }, { "epoch": 6.98, "learning_rate": 1.5088654030919732e-05, "loss": 1.0161, "step": 79940 }, { "epoch": 6.98, "learning_rate": 1.5084286837278366e-05, "loss": 0.8872, "step": 79950 }, { "epoch": 6.98, "learning_rate": 1.5079919643637e-05, "loss": 0.8362, "step": 79960 }, { "epoch": 6.98, "learning_rate": 1.5075552449995634e-05, "loss": 0.8805, "step": 79970 }, { "epoch": 6.99, "learning_rate": 1.5071185256354267e-05, "loss": 0.9978, "step": 79980 }, { "epoch": 6.99, "learning_rate": 1.50668180627129e-05, "loss": 0.8735, "step": 79990 }, { "epoch": 6.99, "learning_rate": 1.5062450869071535e-05, "loss": 0.9091, "step": 80000 }, { "epoch": 6.99, "eval_accuracy": 0.5740395008352167, "eval_loss": 0.8836946487426758, "eval_runtime": 84.1664, "eval_samples_per_second": 120.915, "eval_steps_per_second": 15.125, "step": 80000 }, { "epoch": 6.99, "learning_rate": 1.5058083675430168e-05, "loss": 0.8624, "step": 80010 }, { "epoch": 6.99, "learning_rate": 1.5053716481788805e-05, "loss": 0.8827, "step": 80020 }, { "epoch": 6.99, "learning_rate": 1.5049349288147438e-05, "loss": 0.8518, "step": 80030 }, { "epoch": 6.99, "learning_rate": 1.5044982094506071e-05, "loss": 0.863, "step": 80040 }, { "epoch": 6.99, "learning_rate": 1.5040614900864704e-05, "loss": 0.998, "step": 80050 }, { "epoch": 6.99, "learning_rate": 1.503624770722334e-05, "loss": 0.8798, "step": 80060 }, { "epoch": 6.99, "learning_rate": 1.5031880513581973e-05, "loss": 0.8039, "step": 80070 }, { "epoch": 6.99, "learning_rate": 1.5027513319940606e-05, "loss": 1.0405, "step": 80080 }, { "epoch": 7.0, "learning_rate": 1.5023146126299239e-05, "loss": 0.8597, "step": 80090 }, { "epoch": 7.0, "learning_rate": 1.5018778932657876e-05, "loss": 0.9837, "step": 80100 }, { "epoch": 7.0, "learning_rate": 1.5014411739016509e-05, "loss": 0.9001, "step": 80110 }, { "epoch": 7.0, "learning_rate": 1.5010044545375144e-05, "loss": 0.9457, "step": 80120 }, { "epoch": 7.0, "learning_rate": 1.5005677351733777e-05, "loss": 0.9808, "step": 80130 }, { "epoch": 7.0, "learning_rate": 1.500131015809241e-05, "loss": 0.7513, "step": 80140 }, { "epoch": 7.0, "learning_rate": 1.4996942964451043e-05, "loss": 0.9422, "step": 80150 }, { "epoch": 7.0, "learning_rate": 1.4992575770809678e-05, "loss": 0.8976, "step": 80160 }, { "epoch": 7.0, "learning_rate": 1.4988208577168311e-05, "loss": 0.7991, "step": 80170 }, { "epoch": 7.0, "learning_rate": 1.4983841383526948e-05, "loss": 0.89, "step": 80180 }, { "epoch": 7.0, "learning_rate": 1.4979474189885581e-05, "loss": 0.8943, "step": 80190 }, { "epoch": 7.0, "learning_rate": 1.4975106996244214e-05, "loss": 0.8776, "step": 80200 }, { "epoch": 7.01, "learning_rate": 1.4970739802602848e-05, "loss": 0.8711, "step": 80210 }, { "epoch": 7.01, "learning_rate": 1.4966372608961482e-05, "loss": 0.7418, "step": 80220 }, { "epoch": 7.01, "learning_rate": 1.4962005415320116e-05, "loss": 0.9364, "step": 80230 }, { "epoch": 7.01, "learning_rate": 1.4957638221678749e-05, "loss": 0.802, "step": 80240 }, { "epoch": 7.01, "learning_rate": 1.4953271028037382e-05, "loss": 0.8602, "step": 80250 }, { "epoch": 7.01, "learning_rate": 1.4948903834396019e-05, "loss": 0.7772, "step": 80260 }, { "epoch": 7.01, "learning_rate": 1.4944536640754654e-05, "loss": 0.8466, "step": 80270 }, { "epoch": 7.01, "learning_rate": 1.4940169447113287e-05, "loss": 0.8186, "step": 80280 }, { "epoch": 7.01, "learning_rate": 1.493580225347192e-05, "loss": 0.9439, "step": 80290 }, { "epoch": 7.01, "learning_rate": 1.4931435059830553e-05, "loss": 0.797, "step": 80300 }, { "epoch": 7.01, "learning_rate": 1.4927067866189186e-05, "loss": 0.8842, "step": 80310 }, { "epoch": 7.02, "learning_rate": 1.4922700672547821e-05, "loss": 0.9396, "step": 80320 }, { "epoch": 7.02, "learning_rate": 1.4918333478906454e-05, "loss": 0.8866, "step": 80330 }, { "epoch": 7.02, "learning_rate": 1.4913966285265088e-05, "loss": 0.8618, "step": 80340 }, { "epoch": 7.02, "learning_rate": 1.4909599091623724e-05, "loss": 0.7788, "step": 80350 }, { "epoch": 7.02, "learning_rate": 1.4905231897982357e-05, "loss": 0.8077, "step": 80360 }, { "epoch": 7.02, "learning_rate": 1.4900864704340992e-05, "loss": 0.7306, "step": 80370 }, { "epoch": 7.02, "learning_rate": 1.4896497510699626e-05, "loss": 0.8554, "step": 80380 }, { "epoch": 7.02, "learning_rate": 1.4892130317058259e-05, "loss": 0.9497, "step": 80390 }, { "epoch": 7.02, "learning_rate": 1.4887763123416892e-05, "loss": 0.7697, "step": 80400 }, { "epoch": 7.02, "learning_rate": 1.4883395929775525e-05, "loss": 0.8193, "step": 80410 }, { "epoch": 7.02, "learning_rate": 1.487902873613416e-05, "loss": 0.9081, "step": 80420 }, { "epoch": 7.03, "learning_rate": 1.4874661542492797e-05, "loss": 0.9023, "step": 80430 }, { "epoch": 7.03, "learning_rate": 1.487029434885143e-05, "loss": 0.9479, "step": 80440 }, { "epoch": 7.03, "learning_rate": 1.4865927155210063e-05, "loss": 0.8651, "step": 80450 }, { "epoch": 7.03, "learning_rate": 1.4861559961568696e-05, "loss": 0.905, "step": 80460 }, { "epoch": 7.03, "learning_rate": 1.4857192767927331e-05, "loss": 0.9537, "step": 80470 }, { "epoch": 7.03, "learning_rate": 1.4852825574285964e-05, "loss": 0.8254, "step": 80480 }, { "epoch": 7.03, "learning_rate": 1.4848458380644598e-05, "loss": 0.838, "step": 80490 }, { "epoch": 7.03, "learning_rate": 1.484409118700323e-05, "loss": 0.8362, "step": 80500 }, { "epoch": 7.03, "learning_rate": 1.4839723993361867e-05, "loss": 0.9832, "step": 80510 }, { "epoch": 7.03, "learning_rate": 1.48353567997205e-05, "loss": 0.778, "step": 80520 }, { "epoch": 7.03, "learning_rate": 1.4830989606079135e-05, "loss": 0.8155, "step": 80530 }, { "epoch": 7.03, "learning_rate": 1.4826622412437769e-05, "loss": 0.9683, "step": 80540 }, { "epoch": 7.04, "learning_rate": 1.4822255218796402e-05, "loss": 0.8294, "step": 80550 }, { "epoch": 7.04, "learning_rate": 1.4817888025155035e-05, "loss": 0.8467, "step": 80560 }, { "epoch": 7.04, "learning_rate": 1.481352083151367e-05, "loss": 0.8438, "step": 80570 }, { "epoch": 7.04, "learning_rate": 1.4809153637872303e-05, "loss": 0.7572, "step": 80580 }, { "epoch": 7.04, "learning_rate": 1.480478644423094e-05, "loss": 0.7504, "step": 80590 }, { "epoch": 7.04, "learning_rate": 1.4800419250589573e-05, "loss": 0.9171, "step": 80600 }, { "epoch": 7.04, "learning_rate": 1.4796052056948206e-05, "loss": 0.9013, "step": 80610 }, { "epoch": 7.04, "learning_rate": 1.479168486330684e-05, "loss": 0.8931, "step": 80620 }, { "epoch": 7.04, "learning_rate": 1.4787317669665474e-05, "loss": 0.7945, "step": 80630 }, { "epoch": 7.04, "learning_rate": 1.4782950476024107e-05, "loss": 0.8592, "step": 80640 }, { "epoch": 7.04, "learning_rate": 1.477858328238274e-05, "loss": 0.8559, "step": 80650 }, { "epoch": 7.05, "learning_rate": 1.4774216088741374e-05, "loss": 0.9519, "step": 80660 }, { "epoch": 7.05, "learning_rate": 1.476984889510001e-05, "loss": 0.8289, "step": 80670 }, { "epoch": 7.05, "learning_rate": 1.4765481701458644e-05, "loss": 1.0453, "step": 80680 }, { "epoch": 7.05, "learning_rate": 1.4761114507817279e-05, "loss": 0.9239, "step": 80690 }, { "epoch": 7.05, "learning_rate": 1.4756747314175912e-05, "loss": 0.7435, "step": 80700 }, { "epoch": 7.05, "learning_rate": 1.4752380120534545e-05, "loss": 0.8549, "step": 80710 }, { "epoch": 7.05, "learning_rate": 1.4748012926893178e-05, "loss": 0.8464, "step": 80720 }, { "epoch": 7.05, "learning_rate": 1.4743645733251813e-05, "loss": 0.9447, "step": 80730 }, { "epoch": 7.05, "learning_rate": 1.4739278539610446e-05, "loss": 0.8686, "step": 80740 }, { "epoch": 7.05, "learning_rate": 1.473491134596908e-05, "loss": 0.9758, "step": 80750 }, { "epoch": 7.05, "learning_rate": 1.4730544152327716e-05, "loss": 0.8655, "step": 80760 }, { "epoch": 7.05, "learning_rate": 1.472617695868635e-05, "loss": 0.8173, "step": 80770 }, { "epoch": 7.06, "learning_rate": 1.4721809765044983e-05, "loss": 0.8547, "step": 80780 }, { "epoch": 7.06, "learning_rate": 1.4717442571403617e-05, "loss": 0.8924, "step": 80790 }, { "epoch": 7.06, "learning_rate": 1.471307537776225e-05, "loss": 0.8063, "step": 80800 }, { "epoch": 7.06, "learning_rate": 1.4708708184120884e-05, "loss": 0.8579, "step": 80810 }, { "epoch": 7.06, "learning_rate": 1.4704340990479517e-05, "loss": 0.9205, "step": 80820 }, { "epoch": 7.06, "learning_rate": 1.4699973796838152e-05, "loss": 0.8016, "step": 80830 }, { "epoch": 7.06, "learning_rate": 1.4695606603196787e-05, "loss": 0.8678, "step": 80840 }, { "epoch": 7.06, "learning_rate": 1.4691239409555422e-05, "loss": 1.0111, "step": 80850 }, { "epoch": 7.06, "learning_rate": 1.4686872215914055e-05, "loss": 0.9698, "step": 80860 }, { "epoch": 7.06, "learning_rate": 1.4682505022272688e-05, "loss": 0.8882, "step": 80870 }, { "epoch": 7.06, "learning_rate": 1.4678137828631321e-05, "loss": 0.9352, "step": 80880 }, { "epoch": 7.07, "learning_rate": 1.4673770634989956e-05, "loss": 0.9535, "step": 80890 }, { "epoch": 7.07, "learning_rate": 1.466940344134859e-05, "loss": 0.9282, "step": 80900 }, { "epoch": 7.07, "learning_rate": 1.4665036247707223e-05, "loss": 0.8376, "step": 80910 }, { "epoch": 7.07, "learning_rate": 1.466066905406586e-05, "loss": 0.9049, "step": 80920 }, { "epoch": 7.07, "learning_rate": 1.4656301860424492e-05, "loss": 0.9621, "step": 80930 }, { "epoch": 7.07, "learning_rate": 1.4651934666783126e-05, "loss": 0.9431, "step": 80940 }, { "epoch": 7.07, "learning_rate": 1.464756747314176e-05, "loss": 0.8286, "step": 80950 }, { "epoch": 7.07, "learning_rate": 1.4643200279500394e-05, "loss": 0.8153, "step": 80960 }, { "epoch": 7.07, "learning_rate": 1.4638833085859027e-05, "loss": 0.7773, "step": 80970 }, { "epoch": 7.07, "learning_rate": 1.463446589221766e-05, "loss": 0.9341, "step": 80980 }, { "epoch": 7.07, "learning_rate": 1.4630098698576295e-05, "loss": 0.8297, "step": 80990 }, { "epoch": 7.07, "learning_rate": 1.4625731504934932e-05, "loss": 0.8766, "step": 81000 }, { "epoch": 7.08, "learning_rate": 1.4621364311293565e-05, "loss": 0.7087, "step": 81010 }, { "epoch": 7.08, "learning_rate": 1.4616997117652198e-05, "loss": 0.9686, "step": 81020 }, { "epoch": 7.08, "learning_rate": 1.4612629924010831e-05, "loss": 0.7876, "step": 81030 }, { "epoch": 7.08, "learning_rate": 1.4608262730369464e-05, "loss": 0.7951, "step": 81040 }, { "epoch": 7.08, "learning_rate": 1.46038955367281e-05, "loss": 0.897, "step": 81050 }, { "epoch": 7.08, "learning_rate": 1.4599528343086733e-05, "loss": 1.0029, "step": 81060 }, { "epoch": 7.08, "learning_rate": 1.4595161149445366e-05, "loss": 0.844, "step": 81070 }, { "epoch": 7.08, "learning_rate": 1.4590793955804002e-05, "loss": 0.7635, "step": 81080 }, { "epoch": 7.08, "learning_rate": 1.4586426762162636e-05, "loss": 0.8458, "step": 81090 }, { "epoch": 7.08, "learning_rate": 1.458205956852127e-05, "loss": 0.8996, "step": 81100 }, { "epoch": 7.08, "learning_rate": 1.4577692374879904e-05, "loss": 0.7617, "step": 81110 }, { "epoch": 7.09, "learning_rate": 1.4573325181238537e-05, "loss": 0.9932, "step": 81120 }, { "epoch": 7.09, "learning_rate": 1.456895798759717e-05, "loss": 0.9116, "step": 81130 }, { "epoch": 7.09, "learning_rate": 1.4564590793955803e-05, "loss": 0.9602, "step": 81140 }, { "epoch": 7.09, "learning_rate": 1.4560223600314438e-05, "loss": 0.9909, "step": 81150 }, { "epoch": 7.09, "learning_rate": 1.4555856406673071e-05, "loss": 0.8001, "step": 81160 }, { "epoch": 7.09, "learning_rate": 1.4551489213031708e-05, "loss": 0.817, "step": 81170 }, { "epoch": 7.09, "learning_rate": 1.4547122019390341e-05, "loss": 0.7725, "step": 81180 }, { "epoch": 7.09, "learning_rate": 1.4542754825748974e-05, "loss": 0.9607, "step": 81190 }, { "epoch": 7.09, "learning_rate": 1.453838763210761e-05, "loss": 0.8018, "step": 81200 }, { "epoch": 7.09, "learning_rate": 1.4534020438466242e-05, "loss": 0.9003, "step": 81210 }, { "epoch": 7.09, "learning_rate": 1.4529653244824876e-05, "loss": 0.9196, "step": 81220 }, { "epoch": 7.09, "learning_rate": 1.4525286051183509e-05, "loss": 0.8446, "step": 81230 }, { "epoch": 7.1, "learning_rate": 1.4520918857542142e-05, "loss": 0.9423, "step": 81240 }, { "epoch": 7.1, "learning_rate": 1.4516551663900779e-05, "loss": 0.8726, "step": 81250 }, { "epoch": 7.1, "learning_rate": 1.4512184470259414e-05, "loss": 0.8666, "step": 81260 }, { "epoch": 7.1, "learning_rate": 1.4507817276618047e-05, "loss": 0.8842, "step": 81270 }, { "epoch": 7.1, "learning_rate": 1.450345008297668e-05, "loss": 0.8682, "step": 81280 }, { "epoch": 7.1, "learning_rate": 1.4499082889335313e-05, "loss": 0.9058, "step": 81290 }, { "epoch": 7.1, "learning_rate": 1.4494715695693948e-05, "loss": 0.907, "step": 81300 }, { "epoch": 7.1, "learning_rate": 1.4490348502052581e-05, "loss": 0.8484, "step": 81310 }, { "epoch": 7.1, "learning_rate": 1.4485981308411214e-05, "loss": 0.817, "step": 81320 }, { "epoch": 7.1, "learning_rate": 1.4481614114769851e-05, "loss": 0.8639, "step": 81330 }, { "epoch": 7.1, "learning_rate": 1.4477246921128484e-05, "loss": 0.7158, "step": 81340 }, { "epoch": 7.11, "learning_rate": 1.4472879727487117e-05, "loss": 1.0229, "step": 81350 }, { "epoch": 7.11, "learning_rate": 1.4468512533845752e-05, "loss": 1.0482, "step": 81360 }, { "epoch": 7.11, "learning_rate": 1.4464145340204386e-05, "loss": 0.8863, "step": 81370 }, { "epoch": 7.11, "learning_rate": 1.4459778146563019e-05, "loss": 0.8736, "step": 81380 }, { "epoch": 7.11, "learning_rate": 1.4455410952921652e-05, "loss": 0.848, "step": 81390 }, { "epoch": 7.11, "learning_rate": 1.4451043759280287e-05, "loss": 0.8531, "step": 81400 }, { "epoch": 7.11, "learning_rate": 1.4446676565638922e-05, "loss": 0.859, "step": 81410 }, { "epoch": 7.11, "learning_rate": 1.4442309371997557e-05, "loss": 0.9786, "step": 81420 }, { "epoch": 7.11, "learning_rate": 1.443794217835619e-05, "loss": 0.8075, "step": 81430 }, { "epoch": 7.11, "learning_rate": 1.4433574984714823e-05, "loss": 0.9605, "step": 81440 }, { "epoch": 7.11, "learning_rate": 1.4429207791073456e-05, "loss": 0.9307, "step": 81450 }, { "epoch": 7.12, "learning_rate": 1.4424840597432091e-05, "loss": 0.9399, "step": 81460 }, { "epoch": 7.12, "learning_rate": 1.4420473403790724e-05, "loss": 0.8244, "step": 81470 }, { "epoch": 7.12, "learning_rate": 1.4416106210149358e-05, "loss": 0.8474, "step": 81480 }, { "epoch": 7.12, "learning_rate": 1.441173901650799e-05, "loss": 0.8172, "step": 81490 }, { "epoch": 7.12, "learning_rate": 1.4407371822866627e-05, "loss": 0.8155, "step": 81500 }, { "epoch": 7.12, "learning_rate": 1.440300462922526e-05, "loss": 0.9044, "step": 81510 }, { "epoch": 7.12, "learning_rate": 1.4398637435583895e-05, "loss": 0.928, "step": 81520 }, { "epoch": 7.12, "learning_rate": 1.4394270241942529e-05, "loss": 0.9296, "step": 81530 }, { "epoch": 7.12, "learning_rate": 1.4389903048301162e-05, "loss": 0.7607, "step": 81540 }, { "epoch": 7.12, "learning_rate": 1.4385535854659795e-05, "loss": 0.8285, "step": 81550 }, { "epoch": 7.12, "learning_rate": 1.438116866101843e-05, "loss": 0.8134, "step": 81560 }, { "epoch": 7.12, "learning_rate": 1.4376801467377063e-05, "loss": 0.9304, "step": 81570 }, { "epoch": 7.13, "learning_rate": 1.43724342737357e-05, "loss": 0.8363, "step": 81580 }, { "epoch": 7.13, "learning_rate": 1.4368067080094333e-05, "loss": 0.8372, "step": 81590 }, { "epoch": 7.13, "learning_rate": 1.4363699886452966e-05, "loss": 0.773, "step": 81600 }, { "epoch": 7.13, "learning_rate": 1.43593326928116e-05, "loss": 0.848, "step": 81610 }, { "epoch": 7.13, "learning_rate": 1.4354965499170234e-05, "loss": 0.7567, "step": 81620 }, { "epoch": 7.13, "learning_rate": 1.4350598305528867e-05, "loss": 0.8421, "step": 81630 }, { "epoch": 7.13, "learning_rate": 1.43462311118875e-05, "loss": 0.766, "step": 81640 }, { "epoch": 7.13, "learning_rate": 1.4341863918246134e-05, "loss": 0.8717, "step": 81650 }, { "epoch": 7.13, "learning_rate": 1.433749672460477e-05, "loss": 0.8831, "step": 81660 }, { "epoch": 7.13, "learning_rate": 1.4333129530963405e-05, "loss": 0.8999, "step": 81670 }, { "epoch": 7.13, "learning_rate": 1.4328762337322039e-05, "loss": 0.8693, "step": 81680 }, { "epoch": 7.14, "learning_rate": 1.4324395143680672e-05, "loss": 0.9006, "step": 81690 }, { "epoch": 7.14, "learning_rate": 1.4320027950039305e-05, "loss": 0.9501, "step": 81700 }, { "epoch": 7.14, "learning_rate": 1.4315660756397938e-05, "loss": 0.8574, "step": 81710 }, { "epoch": 7.14, "learning_rate": 1.4311293562756573e-05, "loss": 0.8928, "step": 81720 }, { "epoch": 7.14, "learning_rate": 1.4306926369115206e-05, "loss": 0.8957, "step": 81730 }, { "epoch": 7.14, "learning_rate": 1.4302559175473843e-05, "loss": 0.9289, "step": 81740 }, { "epoch": 7.14, "learning_rate": 1.4298191981832476e-05, "loss": 0.8929, "step": 81750 }, { "epoch": 7.14, "learning_rate": 1.429382478819111e-05, "loss": 0.7489, "step": 81760 }, { "epoch": 7.14, "learning_rate": 1.4289457594549744e-05, "loss": 0.8785, "step": 81770 }, { "epoch": 7.14, "learning_rate": 1.4285090400908377e-05, "loss": 0.9023, "step": 81780 }, { "epoch": 7.14, "learning_rate": 1.428072320726701e-05, "loss": 0.9246, "step": 81790 }, { "epoch": 7.14, "learning_rate": 1.4276356013625644e-05, "loss": 0.8908, "step": 81800 }, { "epoch": 7.15, "learning_rate": 1.4271988819984277e-05, "loss": 0.831, "step": 81810 }, { "epoch": 7.15, "learning_rate": 1.4267621626342914e-05, "loss": 0.8775, "step": 81820 }, { "epoch": 7.15, "learning_rate": 1.4263254432701548e-05, "loss": 0.9145, "step": 81830 }, { "epoch": 7.15, "learning_rate": 1.4258887239060182e-05, "loss": 0.9068, "step": 81840 }, { "epoch": 7.15, "learning_rate": 1.4254520045418815e-05, "loss": 0.9093, "step": 81850 }, { "epoch": 7.15, "learning_rate": 1.4250152851777448e-05, "loss": 0.9638, "step": 81860 }, { "epoch": 7.15, "learning_rate": 1.4245785658136083e-05, "loss": 0.8564, "step": 81870 }, { "epoch": 7.15, "learning_rate": 1.4241418464494716e-05, "loss": 0.9311, "step": 81880 }, { "epoch": 7.15, "learning_rate": 1.423705127085335e-05, "loss": 0.8612, "step": 81890 }, { "epoch": 7.15, "learning_rate": 1.4232684077211983e-05, "loss": 0.8499, "step": 81900 }, { "epoch": 7.15, "learning_rate": 1.4228316883570619e-05, "loss": 0.9549, "step": 81910 }, { "epoch": 7.16, "learning_rate": 1.4223949689929252e-05, "loss": 0.9511, "step": 81920 }, { "epoch": 7.16, "learning_rate": 1.4219582496287887e-05, "loss": 0.9803, "step": 81930 }, { "epoch": 7.16, "learning_rate": 1.421521530264652e-05, "loss": 0.9088, "step": 81940 }, { "epoch": 7.16, "learning_rate": 1.4210848109005154e-05, "loss": 0.9222, "step": 81950 }, { "epoch": 7.16, "learning_rate": 1.4206480915363787e-05, "loss": 0.8626, "step": 81960 }, { "epoch": 7.16, "learning_rate": 1.4202113721722422e-05, "loss": 0.8483, "step": 81970 }, { "epoch": 7.16, "learning_rate": 1.4197746528081055e-05, "loss": 0.9302, "step": 81980 }, { "epoch": 7.16, "learning_rate": 1.4193379334439692e-05, "loss": 0.9581, "step": 81990 }, { "epoch": 7.16, "learning_rate": 1.4189012140798325e-05, "loss": 0.918, "step": 82000 }, { "epoch": 7.16, "learning_rate": 1.4184644947156958e-05, "loss": 0.8597, "step": 82010 }, { "epoch": 7.16, "learning_rate": 1.4180277753515591e-05, "loss": 0.9217, "step": 82020 }, { "epoch": 7.16, "learning_rate": 1.4175910559874226e-05, "loss": 0.7978, "step": 82030 }, { "epoch": 7.17, "learning_rate": 1.417154336623286e-05, "loss": 0.9433, "step": 82040 }, { "epoch": 7.17, "learning_rate": 1.4167176172591492e-05, "loss": 0.9607, "step": 82050 }, { "epoch": 7.17, "learning_rate": 1.4162808978950126e-05, "loss": 0.8769, "step": 82060 }, { "epoch": 7.17, "learning_rate": 1.4158441785308762e-05, "loss": 0.8446, "step": 82070 }, { "epoch": 7.17, "learning_rate": 1.4154074591667395e-05, "loss": 0.8769, "step": 82080 }, { "epoch": 7.17, "learning_rate": 1.414970739802603e-05, "loss": 0.7533, "step": 82090 }, { "epoch": 7.17, "learning_rate": 1.4145340204384664e-05, "loss": 0.8921, "step": 82100 }, { "epoch": 7.17, "learning_rate": 1.4140973010743297e-05, "loss": 0.8849, "step": 82110 }, { "epoch": 7.17, "learning_rate": 1.413660581710193e-05, "loss": 0.9489, "step": 82120 }, { "epoch": 7.17, "learning_rate": 1.4132238623460565e-05, "loss": 0.879, "step": 82130 }, { "epoch": 7.17, "learning_rate": 1.4127871429819198e-05, "loss": 0.8047, "step": 82140 }, { "epoch": 7.18, "learning_rate": 1.4123504236177835e-05, "loss": 0.8436, "step": 82150 }, { "epoch": 7.18, "learning_rate": 1.4119137042536468e-05, "loss": 0.8224, "step": 82160 }, { "epoch": 7.18, "learning_rate": 1.4114769848895101e-05, "loss": 0.8733, "step": 82170 }, { "epoch": 7.18, "learning_rate": 1.4110402655253734e-05, "loss": 0.8443, "step": 82180 }, { "epoch": 7.18, "learning_rate": 1.410603546161237e-05, "loss": 0.8823, "step": 82190 }, { "epoch": 7.18, "learning_rate": 1.4101668267971002e-05, "loss": 0.8213, "step": 82200 }, { "epoch": 7.18, "learning_rate": 1.4097301074329636e-05, "loss": 0.8704, "step": 82210 }, { "epoch": 7.18, "learning_rate": 1.4092933880688269e-05, "loss": 1.1062, "step": 82220 }, { "epoch": 7.18, "learning_rate": 1.4088566687046904e-05, "loss": 0.872, "step": 82230 }, { "epoch": 7.18, "learning_rate": 1.4084199493405539e-05, "loss": 0.8939, "step": 82240 }, { "epoch": 7.18, "learning_rate": 1.4079832299764173e-05, "loss": 0.8828, "step": 82250 }, { "epoch": 7.18, "learning_rate": 1.4075465106122807e-05, "loss": 0.9137, "step": 82260 }, { "epoch": 7.19, "learning_rate": 1.407109791248144e-05, "loss": 0.8445, "step": 82270 }, { "epoch": 7.19, "learning_rate": 1.4066730718840073e-05, "loss": 0.796, "step": 82280 }, { "epoch": 7.19, "learning_rate": 1.4062363525198708e-05, "loss": 0.8404, "step": 82290 }, { "epoch": 7.19, "learning_rate": 1.4057996331557341e-05, "loss": 0.9247, "step": 82300 }, { "epoch": 7.19, "learning_rate": 1.4053629137915974e-05, "loss": 0.7948, "step": 82310 }, { "epoch": 7.19, "learning_rate": 1.4049261944274611e-05, "loss": 0.8881, "step": 82320 }, { "epoch": 7.19, "learning_rate": 1.4044894750633244e-05, "loss": 0.9262, "step": 82330 }, { "epoch": 7.19, "learning_rate": 1.4040527556991877e-05, "loss": 0.9318, "step": 82340 }, { "epoch": 7.19, "learning_rate": 1.4036160363350512e-05, "loss": 0.9113, "step": 82350 }, { "epoch": 7.19, "learning_rate": 1.4031793169709145e-05, "loss": 0.8334, "step": 82360 }, { "epoch": 7.19, "learning_rate": 1.4027425976067779e-05, "loss": 0.7816, "step": 82370 }, { "epoch": 7.2, "learning_rate": 1.4023058782426412e-05, "loss": 0.947, "step": 82380 }, { "epoch": 7.2, "learning_rate": 1.4018691588785047e-05, "loss": 0.8527, "step": 82390 }, { "epoch": 7.2, "learning_rate": 1.4014324395143683e-05, "loss": 0.7924, "step": 82400 }, { "epoch": 7.2, "learning_rate": 1.4009957201502317e-05, "loss": 1.0049, "step": 82410 }, { "epoch": 7.2, "learning_rate": 1.400559000786095e-05, "loss": 0.8423, "step": 82420 }, { "epoch": 7.2, "learning_rate": 1.4001222814219583e-05, "loss": 0.7705, "step": 82430 }, { "epoch": 7.2, "learning_rate": 1.3996855620578216e-05, "loss": 0.8609, "step": 82440 }, { "epoch": 7.2, "learning_rate": 1.3992488426936851e-05, "loss": 0.8368, "step": 82450 }, { "epoch": 7.2, "learning_rate": 1.3988121233295484e-05, "loss": 0.8512, "step": 82460 }, { "epoch": 7.2, "learning_rate": 1.3983754039654117e-05, "loss": 0.8938, "step": 82470 }, { "epoch": 7.2, "learning_rate": 1.3979386846012754e-05, "loss": 0.9089, "step": 82480 }, { "epoch": 7.2, "learning_rate": 1.3975019652371387e-05, "loss": 0.7891, "step": 82490 }, { "epoch": 7.21, "learning_rate": 1.3970652458730022e-05, "loss": 0.9333, "step": 82500 }, { "epoch": 7.21, "learning_rate": 1.3966285265088655e-05, "loss": 0.9734, "step": 82510 }, { "epoch": 7.21, "learning_rate": 1.3961918071447289e-05, "loss": 0.8821, "step": 82520 }, { "epoch": 7.21, "learning_rate": 1.3957550877805922e-05, "loss": 0.7999, "step": 82530 }, { "epoch": 7.21, "learning_rate": 1.3953183684164555e-05, "loss": 0.9066, "step": 82540 }, { "epoch": 7.21, "learning_rate": 1.394881649052319e-05, "loss": 0.9067, "step": 82550 }, { "epoch": 7.21, "learning_rate": 1.3944449296881826e-05, "loss": 0.8943, "step": 82560 }, { "epoch": 7.21, "learning_rate": 1.394008210324046e-05, "loss": 0.7989, "step": 82570 }, { "epoch": 7.21, "learning_rate": 1.3935714909599093e-05, "loss": 0.8108, "step": 82580 }, { "epoch": 7.21, "learning_rate": 1.3931347715957726e-05, "loss": 0.8302, "step": 82590 }, { "epoch": 7.21, "learning_rate": 1.3926980522316361e-05, "loss": 0.8992, "step": 82600 }, { "epoch": 7.22, "learning_rate": 1.3922613328674994e-05, "loss": 0.8516, "step": 82610 }, { "epoch": 7.22, "learning_rate": 1.3918246135033627e-05, "loss": 0.7264, "step": 82620 }, { "epoch": 7.22, "learning_rate": 1.391387894139226e-05, "loss": 0.787, "step": 82630 }, { "epoch": 7.22, "learning_rate": 1.3909511747750894e-05, "loss": 0.9501, "step": 82640 }, { "epoch": 7.22, "learning_rate": 1.390514455410953e-05, "loss": 0.8231, "step": 82650 }, { "epoch": 7.22, "learning_rate": 1.3900777360468165e-05, "loss": 0.9062, "step": 82660 }, { "epoch": 7.22, "learning_rate": 1.3896410166826798e-05, "loss": 0.8834, "step": 82670 }, { "epoch": 7.22, "learning_rate": 1.3892042973185432e-05, "loss": 0.8015, "step": 82680 }, { "epoch": 7.22, "learning_rate": 1.3887675779544065e-05, "loss": 0.9102, "step": 82690 }, { "epoch": 7.22, "learning_rate": 1.38833085859027e-05, "loss": 0.9485, "step": 82700 }, { "epoch": 7.22, "learning_rate": 1.3878941392261333e-05, "loss": 0.9351, "step": 82710 }, { "epoch": 7.23, "learning_rate": 1.3874574198619966e-05, "loss": 0.7962, "step": 82720 }, { "epoch": 7.23, "learning_rate": 1.3870207004978603e-05, "loss": 0.9359, "step": 82730 }, { "epoch": 7.23, "learning_rate": 1.3865839811337236e-05, "loss": 0.9094, "step": 82740 }, { "epoch": 7.23, "learning_rate": 1.386147261769587e-05, "loss": 0.8486, "step": 82750 }, { "epoch": 7.23, "learning_rate": 1.3857105424054504e-05, "loss": 0.8034, "step": 82760 }, { "epoch": 7.23, "learning_rate": 1.3852738230413137e-05, "loss": 0.9047, "step": 82770 }, { "epoch": 7.23, "learning_rate": 1.384837103677177e-05, "loss": 0.8413, "step": 82780 }, { "epoch": 7.23, "learning_rate": 1.3844003843130404e-05, "loss": 0.9194, "step": 82790 }, { "epoch": 7.23, "learning_rate": 1.3839636649489039e-05, "loss": 0.7931, "step": 82800 }, { "epoch": 7.23, "learning_rate": 1.3835269455847673e-05, "loss": 0.7841, "step": 82810 }, { "epoch": 7.23, "learning_rate": 1.3830902262206308e-05, "loss": 0.9348, "step": 82820 }, { "epoch": 7.23, "learning_rate": 1.3826535068564942e-05, "loss": 0.8296, "step": 82830 }, { "epoch": 7.24, "learning_rate": 1.3822167874923575e-05, "loss": 0.7871, "step": 82840 }, { "epoch": 7.24, "learning_rate": 1.3817800681282208e-05, "loss": 0.935, "step": 82850 }, { "epoch": 7.24, "learning_rate": 1.3813433487640843e-05, "loss": 0.8681, "step": 82860 }, { "epoch": 7.24, "learning_rate": 1.3809066293999476e-05, "loss": 0.876, "step": 82870 }, { "epoch": 7.24, "learning_rate": 1.380469910035811e-05, "loss": 0.8765, "step": 82880 }, { "epoch": 7.24, "learning_rate": 1.3800331906716746e-05, "loss": 0.9242, "step": 82890 }, { "epoch": 7.24, "learning_rate": 1.3795964713075379e-05, "loss": 0.8819, "step": 82900 }, { "epoch": 7.24, "learning_rate": 1.3791597519434012e-05, "loss": 0.7769, "step": 82910 }, { "epoch": 7.24, "learning_rate": 1.3787230325792647e-05, "loss": 0.8791, "step": 82920 }, { "epoch": 7.24, "learning_rate": 1.378286313215128e-05, "loss": 0.7932, "step": 82930 }, { "epoch": 7.24, "learning_rate": 1.3778495938509914e-05, "loss": 0.8254, "step": 82940 }, { "epoch": 7.25, "learning_rate": 1.3774128744868547e-05, "loss": 0.9136, "step": 82950 }, { "epoch": 7.25, "learning_rate": 1.3769761551227182e-05, "loss": 0.9037, "step": 82960 }, { "epoch": 7.25, "learning_rate": 1.3765394357585815e-05, "loss": 0.784, "step": 82970 }, { "epoch": 7.25, "learning_rate": 1.3761027163944451e-05, "loss": 0.8929, "step": 82980 }, { "epoch": 7.25, "learning_rate": 1.3756659970303085e-05, "loss": 0.9744, "step": 82990 }, { "epoch": 7.25, "learning_rate": 1.3752292776661718e-05, "loss": 0.9597, "step": 83000 }, { "epoch": 7.25, "learning_rate": 1.3747925583020351e-05, "loss": 0.8147, "step": 83010 }, { "epoch": 7.25, "learning_rate": 1.3743558389378986e-05, "loss": 0.8394, "step": 83020 }, { "epoch": 7.25, "learning_rate": 1.373919119573762e-05, "loss": 0.8403, "step": 83030 }, { "epoch": 7.25, "learning_rate": 1.3734824002096252e-05, "loss": 0.8928, "step": 83040 }, { "epoch": 7.25, "learning_rate": 1.3730456808454886e-05, "loss": 0.8419, "step": 83050 }, { "epoch": 7.25, "learning_rate": 1.3726089614813522e-05, "loss": 0.8295, "step": 83060 }, { "epoch": 7.26, "learning_rate": 1.3721722421172155e-05, "loss": 1.0059, "step": 83070 }, { "epoch": 7.26, "learning_rate": 1.371735522753079e-05, "loss": 0.8211, "step": 83080 }, { "epoch": 7.26, "learning_rate": 1.3712988033889423e-05, "loss": 0.8963, "step": 83090 }, { "epoch": 7.26, "learning_rate": 1.3708620840248057e-05, "loss": 0.8248, "step": 83100 }, { "epoch": 7.26, "learning_rate": 1.370425364660669e-05, "loss": 0.9074, "step": 83110 }, { "epoch": 7.26, "learning_rate": 1.3699886452965325e-05, "loss": 0.8013, "step": 83120 }, { "epoch": 7.26, "learning_rate": 1.3695519259323958e-05, "loss": 0.9037, "step": 83130 }, { "epoch": 7.26, "learning_rate": 1.3691152065682595e-05, "loss": 0.8892, "step": 83140 }, { "epoch": 7.26, "learning_rate": 1.3686784872041228e-05, "loss": 0.8468, "step": 83150 }, { "epoch": 7.26, "learning_rate": 1.3682417678399861e-05, "loss": 0.9716, "step": 83160 }, { "epoch": 7.26, "learning_rate": 1.3678050484758494e-05, "loss": 0.9382, "step": 83170 }, { "epoch": 7.27, "learning_rate": 1.3673683291117129e-05, "loss": 0.8085, "step": 83180 }, { "epoch": 7.27, "learning_rate": 1.3669316097475762e-05, "loss": 0.9192, "step": 83190 }, { "epoch": 7.27, "learning_rate": 1.3664948903834396e-05, "loss": 0.9696, "step": 83200 }, { "epoch": 7.27, "learning_rate": 1.3660581710193029e-05, "loss": 0.8889, "step": 83210 }, { "epoch": 7.27, "learning_rate": 1.3656214516551665e-05, "loss": 0.844, "step": 83220 }, { "epoch": 7.27, "learning_rate": 1.36518473229103e-05, "loss": 0.8699, "step": 83230 }, { "epoch": 7.27, "learning_rate": 1.3647480129268933e-05, "loss": 0.8169, "step": 83240 }, { "epoch": 7.27, "learning_rate": 1.3643112935627567e-05, "loss": 0.7995, "step": 83250 }, { "epoch": 7.27, "learning_rate": 1.36387457419862e-05, "loss": 0.8296, "step": 83260 }, { "epoch": 7.27, "learning_rate": 1.3634378548344835e-05, "loss": 1.0112, "step": 83270 }, { "epoch": 7.27, "learning_rate": 1.3630011354703468e-05, "loss": 0.8423, "step": 83280 }, { "epoch": 7.27, "learning_rate": 1.3625644161062101e-05, "loss": 0.8943, "step": 83290 }, { "epoch": 7.28, "learning_rate": 1.3621276967420738e-05, "loss": 0.7766, "step": 83300 }, { "epoch": 7.28, "learning_rate": 1.3616909773779371e-05, "loss": 0.8748, "step": 83310 }, { "epoch": 7.28, "learning_rate": 1.3612542580138004e-05, "loss": 0.8061, "step": 83320 }, { "epoch": 7.28, "learning_rate": 1.3608175386496639e-05, "loss": 0.8446, "step": 83330 }, { "epoch": 7.28, "learning_rate": 1.3603808192855272e-05, "loss": 0.8073, "step": 83340 }, { "epoch": 7.28, "learning_rate": 1.3599440999213905e-05, "loss": 0.7567, "step": 83350 }, { "epoch": 7.28, "learning_rate": 1.3595073805572539e-05, "loss": 0.8866, "step": 83360 }, { "epoch": 7.28, "learning_rate": 1.3590706611931174e-05, "loss": 0.9681, "step": 83370 }, { "epoch": 7.28, "learning_rate": 1.3586339418289807e-05, "loss": 0.8198, "step": 83380 }, { "epoch": 7.28, "learning_rate": 1.3581972224648443e-05, "loss": 0.9483, "step": 83390 }, { "epoch": 7.28, "learning_rate": 1.3577605031007077e-05, "loss": 0.8323, "step": 83400 }, { "epoch": 7.29, "learning_rate": 1.357323783736571e-05, "loss": 0.9103, "step": 83410 }, { "epoch": 7.29, "learning_rate": 1.3568870643724343e-05, "loss": 0.772, "step": 83420 }, { "epoch": 7.29, "learning_rate": 1.3564503450082978e-05, "loss": 0.8827, "step": 83430 }, { "epoch": 7.29, "learning_rate": 1.3560136256441611e-05, "loss": 0.8649, "step": 83440 }, { "epoch": 7.29, "learning_rate": 1.3555769062800244e-05, "loss": 0.8967, "step": 83450 }, { "epoch": 7.29, "learning_rate": 1.3551401869158877e-05, "loss": 0.7789, "step": 83460 }, { "epoch": 7.29, "learning_rate": 1.3547034675517514e-05, "loss": 1.0304, "step": 83470 }, { "epoch": 7.29, "learning_rate": 1.3542667481876147e-05, "loss": 1.0306, "step": 83480 }, { "epoch": 7.29, "learning_rate": 1.3538300288234782e-05, "loss": 0.846, "step": 83490 }, { "epoch": 7.29, "learning_rate": 1.3533933094593415e-05, "loss": 0.8677, "step": 83500 }, { "epoch": 7.29, "learning_rate": 1.3529565900952049e-05, "loss": 0.7955, "step": 83510 }, { "epoch": 7.29, "learning_rate": 1.3525198707310682e-05, "loss": 0.7908, "step": 83520 }, { "epoch": 7.3, "learning_rate": 1.3520831513669317e-05, "loss": 0.8228, "step": 83530 }, { "epoch": 7.3, "learning_rate": 1.351646432002795e-05, "loss": 0.7341, "step": 83540 }, { "epoch": 7.3, "learning_rate": 1.3512097126386586e-05, "loss": 1.0321, "step": 83550 }, { "epoch": 7.3, "learning_rate": 1.350772993274522e-05, "loss": 0.8788, "step": 83560 }, { "epoch": 7.3, "learning_rate": 1.3503362739103853e-05, "loss": 0.8228, "step": 83570 }, { "epoch": 7.3, "learning_rate": 1.3498995545462486e-05, "loss": 0.9323, "step": 83580 }, { "epoch": 7.3, "learning_rate": 1.3494628351821121e-05, "loss": 0.884, "step": 83590 }, { "epoch": 7.3, "learning_rate": 1.3490261158179754e-05, "loss": 0.9505, "step": 83600 }, { "epoch": 7.3, "learning_rate": 1.3485893964538387e-05, "loss": 0.9052, "step": 83610 }, { "epoch": 7.3, "learning_rate": 1.348152677089702e-05, "loss": 0.877, "step": 83620 }, { "epoch": 7.3, "learning_rate": 1.3477159577255657e-05, "loss": 1.0005, "step": 83630 }, { "epoch": 7.31, "learning_rate": 1.347279238361429e-05, "loss": 0.9828, "step": 83640 }, { "epoch": 7.31, "learning_rate": 1.3468425189972925e-05, "loss": 0.8497, "step": 83650 }, { "epoch": 7.31, "learning_rate": 1.3464057996331558e-05, "loss": 0.8867, "step": 83660 }, { "epoch": 7.31, "learning_rate": 1.3459690802690192e-05, "loss": 0.8744, "step": 83670 }, { "epoch": 7.31, "learning_rate": 1.3455323609048825e-05, "loss": 0.8104, "step": 83680 }, { "epoch": 7.31, "learning_rate": 1.345095641540746e-05, "loss": 0.8536, "step": 83690 }, { "epoch": 7.31, "learning_rate": 1.3446589221766093e-05, "loss": 0.9765, "step": 83700 }, { "epoch": 7.31, "learning_rate": 1.3442222028124726e-05, "loss": 0.9018, "step": 83710 }, { "epoch": 7.31, "learning_rate": 1.3437854834483363e-05, "loss": 0.8558, "step": 83720 }, { "epoch": 7.31, "learning_rate": 1.3433487640841996e-05, "loss": 0.9251, "step": 83730 }, { "epoch": 7.31, "learning_rate": 1.3429120447200629e-05, "loss": 0.8895, "step": 83740 }, { "epoch": 7.32, "learning_rate": 1.3424753253559264e-05, "loss": 0.7772, "step": 83750 }, { "epoch": 7.32, "learning_rate": 1.3420386059917897e-05, "loss": 0.9389, "step": 83760 }, { "epoch": 7.32, "learning_rate": 1.341601886627653e-05, "loss": 0.7228, "step": 83770 }, { "epoch": 7.32, "learning_rate": 1.3411651672635164e-05, "loss": 0.9327, "step": 83780 }, { "epoch": 7.32, "learning_rate": 1.3407284478993799e-05, "loss": 0.799, "step": 83790 }, { "epoch": 7.32, "learning_rate": 1.3402917285352435e-05, "loss": 0.9197, "step": 83800 }, { "epoch": 7.32, "learning_rate": 1.3398550091711068e-05, "loss": 0.8124, "step": 83810 }, { "epoch": 7.32, "learning_rate": 1.3394182898069702e-05, "loss": 0.8149, "step": 83820 }, { "epoch": 7.32, "learning_rate": 1.3389815704428335e-05, "loss": 0.9549, "step": 83830 }, { "epoch": 7.32, "learning_rate": 1.3385448510786968e-05, "loss": 0.8571, "step": 83840 }, { "epoch": 7.32, "learning_rate": 1.3381081317145603e-05, "loss": 0.7843, "step": 83850 }, { "epoch": 7.32, "learning_rate": 1.3376714123504236e-05, "loss": 0.7509, "step": 83860 }, { "epoch": 7.33, "learning_rate": 1.337234692986287e-05, "loss": 0.6993, "step": 83870 }, { "epoch": 7.33, "learning_rate": 1.3367979736221506e-05, "loss": 0.8838, "step": 83880 }, { "epoch": 7.33, "learning_rate": 1.3363612542580139e-05, "loss": 0.8612, "step": 83890 }, { "epoch": 7.33, "learning_rate": 1.3359245348938774e-05, "loss": 0.8956, "step": 83900 }, { "epoch": 7.33, "learning_rate": 1.3354878155297407e-05, "loss": 0.8213, "step": 83910 }, { "epoch": 7.33, "learning_rate": 1.335051096165604e-05, "loss": 0.9393, "step": 83920 }, { "epoch": 7.33, "learning_rate": 1.3346143768014674e-05, "loss": 0.8312, "step": 83930 }, { "epoch": 7.33, "learning_rate": 1.3341776574373307e-05, "loss": 0.9178, "step": 83940 }, { "epoch": 7.33, "learning_rate": 1.3337409380731942e-05, "loss": 0.9146, "step": 83950 }, { "epoch": 7.33, "learning_rate": 1.3333042187090578e-05, "loss": 0.8694, "step": 83960 }, { "epoch": 7.33, "learning_rate": 1.3328674993449211e-05, "loss": 0.9182, "step": 83970 }, { "epoch": 7.34, "learning_rate": 1.3324307799807845e-05, "loss": 0.8414, "step": 83980 }, { "epoch": 7.34, "learning_rate": 1.3319940606166478e-05, "loss": 0.7991, "step": 83990 }, { "epoch": 7.34, "learning_rate": 1.3315573412525113e-05, "loss": 0.9483, "step": 84000 }, { "epoch": 7.34, "learning_rate": 1.3311206218883746e-05, "loss": 0.9226, "step": 84010 }, { "epoch": 7.34, "learning_rate": 1.3306839025242379e-05, "loss": 0.8577, "step": 84020 }, { "epoch": 7.34, "learning_rate": 1.3302471831601012e-05, "loss": 0.8646, "step": 84030 }, { "epoch": 7.34, "learning_rate": 1.3298104637959649e-05, "loss": 0.8682, "step": 84040 }, { "epoch": 7.34, "learning_rate": 1.3293737444318282e-05, "loss": 0.8945, "step": 84050 }, { "epoch": 7.34, "learning_rate": 1.3289370250676917e-05, "loss": 0.8166, "step": 84060 }, { "epoch": 7.34, "learning_rate": 1.328500305703555e-05, "loss": 0.9614, "step": 84070 }, { "epoch": 7.34, "learning_rate": 1.3280635863394183e-05, "loss": 0.8326, "step": 84080 }, { "epoch": 7.34, "learning_rate": 1.3276268669752817e-05, "loss": 0.8434, "step": 84090 }, { "epoch": 7.35, "learning_rate": 1.3271901476111452e-05, "loss": 0.8674, "step": 84100 }, { "epoch": 7.35, "learning_rate": 1.3267534282470085e-05, "loss": 0.8426, "step": 84110 }, { "epoch": 7.35, "learning_rate": 1.3263167088828718e-05, "loss": 0.9191, "step": 84120 }, { "epoch": 7.35, "learning_rate": 1.3258799895187355e-05, "loss": 0.8296, "step": 84130 }, { "epoch": 7.35, "learning_rate": 1.3254432701545988e-05, "loss": 0.8085, "step": 84140 }, { "epoch": 7.35, "learning_rate": 1.3250065507904621e-05, "loss": 0.9697, "step": 84150 }, { "epoch": 7.35, "learning_rate": 1.3245698314263256e-05, "loss": 0.8397, "step": 84160 }, { "epoch": 7.35, "learning_rate": 1.3241331120621889e-05, "loss": 0.942, "step": 84170 }, { "epoch": 7.35, "learning_rate": 1.3236963926980522e-05, "loss": 0.8447, "step": 84180 }, { "epoch": 7.35, "learning_rate": 1.3232596733339155e-05, "loss": 0.9756, "step": 84190 }, { "epoch": 7.35, "learning_rate": 1.322822953969779e-05, "loss": 0.9965, "step": 84200 }, { "epoch": 7.36, "learning_rate": 1.3223862346056425e-05, "loss": 0.8489, "step": 84210 }, { "epoch": 7.36, "learning_rate": 1.321949515241506e-05, "loss": 0.8163, "step": 84220 }, { "epoch": 7.36, "learning_rate": 1.3215127958773693e-05, "loss": 0.9061, "step": 84230 }, { "epoch": 7.36, "learning_rate": 1.3210760765132327e-05, "loss": 0.8393, "step": 84240 }, { "epoch": 7.36, "learning_rate": 1.320639357149096e-05, "loss": 0.7685, "step": 84250 }, { "epoch": 7.36, "learning_rate": 1.3202026377849595e-05, "loss": 0.9348, "step": 84260 }, { "epoch": 7.36, "learning_rate": 1.3197659184208228e-05, "loss": 0.811, "step": 84270 }, { "epoch": 7.36, "learning_rate": 1.3193291990566861e-05, "loss": 0.8782, "step": 84280 }, { "epoch": 7.36, "learning_rate": 1.3188924796925498e-05, "loss": 0.9458, "step": 84290 }, { "epoch": 7.36, "learning_rate": 1.318455760328413e-05, "loss": 0.934, "step": 84300 }, { "epoch": 7.36, "learning_rate": 1.3180190409642764e-05, "loss": 0.8008, "step": 84310 }, { "epoch": 7.36, "learning_rate": 1.3175823216001399e-05, "loss": 0.9315, "step": 84320 }, { "epoch": 7.37, "learning_rate": 1.3171456022360032e-05, "loss": 0.9227, "step": 84330 }, { "epoch": 7.37, "learning_rate": 1.3167088828718665e-05, "loss": 0.9364, "step": 84340 }, { "epoch": 7.37, "learning_rate": 1.3162721635077299e-05, "loss": 0.9844, "step": 84350 }, { "epoch": 7.37, "learning_rate": 1.3158354441435933e-05, "loss": 1.0014, "step": 84360 }, { "epoch": 7.37, "learning_rate": 1.3153987247794568e-05, "loss": 0.7796, "step": 84370 }, { "epoch": 7.37, "learning_rate": 1.3149620054153203e-05, "loss": 0.9717, "step": 84380 }, { "epoch": 7.37, "learning_rate": 1.3145252860511836e-05, "loss": 0.8435, "step": 84390 }, { "epoch": 7.37, "learning_rate": 1.314088566687047e-05, "loss": 0.8648, "step": 84400 }, { "epoch": 7.37, "learning_rate": 1.3136518473229103e-05, "loss": 0.8663, "step": 84410 }, { "epoch": 7.37, "learning_rate": 1.3132151279587738e-05, "loss": 0.7854, "step": 84420 }, { "epoch": 7.37, "learning_rate": 1.3127784085946371e-05, "loss": 0.9065, "step": 84430 }, { "epoch": 7.38, "learning_rate": 1.3123416892305004e-05, "loss": 0.7925, "step": 84440 }, { "epoch": 7.38, "learning_rate": 1.311904969866364e-05, "loss": 0.7917, "step": 84450 }, { "epoch": 7.38, "learning_rate": 1.3114682505022274e-05, "loss": 1.0379, "step": 84460 }, { "epoch": 7.38, "learning_rate": 1.3110315311380907e-05, "loss": 0.8149, "step": 84470 }, { "epoch": 7.38, "learning_rate": 1.3105948117739542e-05, "loss": 0.7406, "step": 84480 }, { "epoch": 7.38, "learning_rate": 1.3101580924098175e-05, "loss": 0.8669, "step": 84490 }, { "epoch": 7.38, "learning_rate": 1.3097213730456808e-05, "loss": 0.8683, "step": 84500 }, { "epoch": 7.38, "learning_rate": 1.3092846536815442e-05, "loss": 0.8732, "step": 84510 }, { "epoch": 7.38, "learning_rate": 1.3088479343174077e-05, "loss": 0.8593, "step": 84520 }, { "epoch": 7.38, "learning_rate": 1.308411214953271e-05, "loss": 0.9928, "step": 84530 }, { "epoch": 7.38, "learning_rate": 1.3079744955891346e-05, "loss": 0.8325, "step": 84540 }, { "epoch": 7.38, "learning_rate": 1.307537776224998e-05, "loss": 0.9837, "step": 84550 }, { "epoch": 7.39, "learning_rate": 1.3071010568608613e-05, "loss": 0.9535, "step": 84560 }, { "epoch": 7.39, "learning_rate": 1.3066643374967246e-05, "loss": 0.7813, "step": 84570 }, { "epoch": 7.39, "learning_rate": 1.3062276181325881e-05, "loss": 1.0039, "step": 84580 }, { "epoch": 7.39, "learning_rate": 1.3057908987684514e-05, "loss": 0.9329, "step": 84590 }, { "epoch": 7.39, "learning_rate": 1.3053541794043147e-05, "loss": 0.8568, "step": 84600 }, { "epoch": 7.39, "learning_rate": 1.304917460040178e-05, "loss": 0.9625, "step": 84610 }, { "epoch": 7.39, "learning_rate": 1.3044807406760417e-05, "loss": 0.8158, "step": 84620 }, { "epoch": 7.39, "learning_rate": 1.3040440213119052e-05, "loss": 0.8811, "step": 84630 }, { "epoch": 7.39, "learning_rate": 1.3036073019477685e-05, "loss": 0.7685, "step": 84640 }, { "epoch": 7.39, "learning_rate": 1.3031705825836318e-05, "loss": 0.7841, "step": 84650 }, { "epoch": 7.39, "learning_rate": 1.3027338632194952e-05, "loss": 0.9407, "step": 84660 }, { "epoch": 7.4, "learning_rate": 1.3022971438553585e-05, "loss": 0.8465, "step": 84670 }, { "epoch": 7.4, "learning_rate": 1.301860424491222e-05, "loss": 0.8975, "step": 84680 }, { "epoch": 7.4, "learning_rate": 1.3014237051270853e-05, "loss": 0.8863, "step": 84690 }, { "epoch": 7.4, "learning_rate": 1.300986985762949e-05, "loss": 0.8926, "step": 84700 }, { "epoch": 7.4, "learning_rate": 1.3005502663988123e-05, "loss": 0.7436, "step": 84710 }, { "epoch": 7.4, "learning_rate": 1.3001135470346756e-05, "loss": 0.9339, "step": 84720 }, { "epoch": 7.4, "learning_rate": 1.299676827670539e-05, "loss": 0.8104, "step": 84730 }, { "epoch": 7.4, "learning_rate": 1.2992401083064024e-05, "loss": 0.802, "step": 84740 }, { "epoch": 7.4, "learning_rate": 1.2988033889422657e-05, "loss": 0.8462, "step": 84750 }, { "epoch": 7.4, "learning_rate": 1.298366669578129e-05, "loss": 0.8908, "step": 84760 }, { "epoch": 7.4, "learning_rate": 1.2979299502139924e-05, "loss": 0.8209, "step": 84770 }, { "epoch": 7.41, "learning_rate": 1.297493230849856e-05, "loss": 0.9063, "step": 84780 }, { "epoch": 7.41, "learning_rate": 1.2970565114857195e-05, "loss": 0.9139, "step": 84790 }, { "epoch": 7.41, "learning_rate": 1.2966197921215828e-05, "loss": 0.9469, "step": 84800 }, { "epoch": 7.41, "learning_rate": 1.2961830727574461e-05, "loss": 0.8, "step": 84810 }, { "epoch": 7.41, "learning_rate": 1.2957463533933095e-05, "loss": 0.8711, "step": 84820 }, { "epoch": 7.41, "learning_rate": 1.295309634029173e-05, "loss": 0.9127, "step": 84830 }, { "epoch": 7.41, "learning_rate": 1.2948729146650363e-05, "loss": 0.8833, "step": 84840 }, { "epoch": 7.41, "learning_rate": 1.2944361953008996e-05, "loss": 0.819, "step": 84850 }, { "epoch": 7.41, "learning_rate": 1.293999475936763e-05, "loss": 0.8635, "step": 84860 }, { "epoch": 7.41, "learning_rate": 1.2935627565726266e-05, "loss": 0.8277, "step": 84870 }, { "epoch": 7.41, "learning_rate": 1.2931260372084899e-05, "loss": 0.9099, "step": 84880 }, { "epoch": 7.41, "learning_rate": 1.2926893178443534e-05, "loss": 0.9312, "step": 84890 }, { "epoch": 7.42, "learning_rate": 1.2922525984802167e-05, "loss": 0.9185, "step": 84900 }, { "epoch": 7.42, "learning_rate": 1.29181587911608e-05, "loss": 0.9424, "step": 84910 }, { "epoch": 7.42, "learning_rate": 1.2913791597519433e-05, "loss": 0.9092, "step": 84920 }, { "epoch": 7.42, "learning_rate": 1.2909424403878068e-05, "loss": 0.7041, "step": 84930 }, { "epoch": 7.42, "learning_rate": 1.2905057210236702e-05, "loss": 0.7648, "step": 84940 }, { "epoch": 7.42, "learning_rate": 1.2900690016595338e-05, "loss": 0.9765, "step": 84950 }, { "epoch": 7.42, "learning_rate": 1.2896322822953971e-05, "loss": 0.9853, "step": 84960 }, { "epoch": 7.42, "learning_rate": 1.2891955629312605e-05, "loss": 0.7562, "step": 84970 }, { "epoch": 7.42, "learning_rate": 1.2887588435671238e-05, "loss": 0.8101, "step": 84980 }, { "epoch": 7.42, "learning_rate": 1.2883221242029873e-05, "loss": 0.8804, "step": 84990 }, { "epoch": 7.42, "learning_rate": 1.2878854048388506e-05, "loss": 0.8869, "step": 85000 }, { "epoch": 7.42, "eval_accuracy": 0.5838655792473224, "eval_loss": 0.8780456185340881, "eval_runtime": 84.1209, "eval_samples_per_second": 120.981, "eval_steps_per_second": 15.133, "step": 85000 }, { "epoch": 7.43, "learning_rate": 1.2874486854747139e-05, "loss": 0.7997, "step": 85010 }, { "epoch": 7.43, "learning_rate": 1.2870119661105772e-05, "loss": 0.8857, "step": 85020 }, { "epoch": 7.43, "learning_rate": 1.2865752467464409e-05, "loss": 0.8391, "step": 85030 }, { "epoch": 7.43, "learning_rate": 1.2861385273823042e-05, "loss": 0.7728, "step": 85040 }, { "epoch": 7.43, "learning_rate": 1.2857018080181677e-05, "loss": 0.7543, "step": 85050 }, { "epoch": 7.43, "learning_rate": 1.285265088654031e-05, "loss": 0.9323, "step": 85060 }, { "epoch": 7.43, "learning_rate": 1.2848283692898943e-05, "loss": 0.8689, "step": 85070 }, { "epoch": 7.43, "learning_rate": 1.2843916499257577e-05, "loss": 0.7749, "step": 85080 }, { "epoch": 7.43, "learning_rate": 1.2839549305616211e-05, "loss": 0.8122, "step": 85090 }, { "epoch": 7.43, "learning_rate": 1.2835182111974845e-05, "loss": 0.8936, "step": 85100 }, { "epoch": 7.43, "learning_rate": 1.2830814918333481e-05, "loss": 0.877, "step": 85110 }, { "epoch": 7.43, "learning_rate": 1.2826447724692114e-05, "loss": 0.7908, "step": 85120 }, { "epoch": 7.44, "learning_rate": 1.2822080531050748e-05, "loss": 1.0436, "step": 85130 }, { "epoch": 7.44, "learning_rate": 1.2817713337409381e-05, "loss": 0.9125, "step": 85140 }, { "epoch": 7.44, "learning_rate": 1.2813346143768016e-05, "loss": 0.9135, "step": 85150 }, { "epoch": 7.44, "learning_rate": 1.2808978950126649e-05, "loss": 0.866, "step": 85160 }, { "epoch": 7.44, "learning_rate": 1.2804611756485282e-05, "loss": 0.9233, "step": 85170 }, { "epoch": 7.44, "learning_rate": 1.2800244562843915e-05, "loss": 0.987, "step": 85180 }, { "epoch": 7.44, "learning_rate": 1.2795877369202552e-05, "loss": 0.93, "step": 85190 }, { "epoch": 7.44, "learning_rate": 1.2791510175561187e-05, "loss": 0.9101, "step": 85200 }, { "epoch": 7.44, "learning_rate": 1.278714298191982e-05, "loss": 0.9219, "step": 85210 }, { "epoch": 7.44, "learning_rate": 1.2782775788278453e-05, "loss": 0.8543, "step": 85220 }, { "epoch": 7.44, "learning_rate": 1.2778408594637086e-05, "loss": 0.9258, "step": 85230 }, { "epoch": 7.45, "learning_rate": 1.277404140099572e-05, "loss": 0.7921, "step": 85240 }, { "epoch": 7.45, "learning_rate": 1.2769674207354355e-05, "loss": 0.9301, "step": 85250 }, { "epoch": 7.45, "learning_rate": 1.2765307013712988e-05, "loss": 0.9654, "step": 85260 }, { "epoch": 7.45, "learning_rate": 1.2760939820071621e-05, "loss": 0.7848, "step": 85270 }, { "epoch": 7.45, "learning_rate": 1.2756572626430258e-05, "loss": 0.8562, "step": 85280 }, { "epoch": 7.45, "learning_rate": 1.275220543278889e-05, "loss": 0.9405, "step": 85290 }, { "epoch": 7.45, "learning_rate": 1.2747838239147526e-05, "loss": 0.8285, "step": 85300 }, { "epoch": 7.45, "learning_rate": 1.2743471045506159e-05, "loss": 0.8609, "step": 85310 }, { "epoch": 7.45, "learning_rate": 1.2739103851864792e-05, "loss": 0.8853, "step": 85320 }, { "epoch": 7.45, "learning_rate": 1.2734736658223425e-05, "loss": 0.8816, "step": 85330 }, { "epoch": 7.45, "learning_rate": 1.2730369464582058e-05, "loss": 0.9403, "step": 85340 }, { "epoch": 7.45, "learning_rate": 1.2726002270940693e-05, "loss": 0.7989, "step": 85350 }, { "epoch": 7.46, "learning_rate": 1.272163507729933e-05, "loss": 0.9279, "step": 85360 }, { "epoch": 7.46, "learning_rate": 1.2717267883657963e-05, "loss": 0.8853, "step": 85370 }, { "epoch": 7.46, "learning_rate": 1.2712900690016596e-05, "loss": 0.843, "step": 85380 }, { "epoch": 7.46, "learning_rate": 1.270853349637523e-05, "loss": 0.8563, "step": 85390 }, { "epoch": 7.46, "learning_rate": 1.2704166302733864e-05, "loss": 0.8437, "step": 85400 }, { "epoch": 7.46, "learning_rate": 1.2699799109092498e-05, "loss": 1.0056, "step": 85410 }, { "epoch": 7.46, "learning_rate": 1.2695431915451131e-05, "loss": 0.874, "step": 85420 }, { "epoch": 7.46, "learning_rate": 1.2691064721809764e-05, "loss": 0.8793, "step": 85430 }, { "epoch": 7.46, "learning_rate": 1.26866975281684e-05, "loss": 0.8761, "step": 85440 }, { "epoch": 7.46, "learning_rate": 1.2682330334527034e-05, "loss": 0.7499, "step": 85450 }, { "epoch": 7.46, "learning_rate": 1.2677963140885669e-05, "loss": 0.8251, "step": 85460 }, { "epoch": 7.47, "learning_rate": 1.2673595947244302e-05, "loss": 0.8137, "step": 85470 }, { "epoch": 7.47, "learning_rate": 1.2669228753602935e-05, "loss": 0.887, "step": 85480 }, { "epoch": 7.47, "learning_rate": 1.2664861559961568e-05, "loss": 0.8372, "step": 85490 }, { "epoch": 7.47, "learning_rate": 1.2660494366320203e-05, "loss": 0.8892, "step": 85500 }, { "epoch": 7.47, "learning_rate": 1.2656127172678836e-05, "loss": 0.7281, "step": 85510 }, { "epoch": 7.47, "learning_rate": 1.2651759979037473e-05, "loss": 0.9407, "step": 85520 }, { "epoch": 7.47, "learning_rate": 1.2647392785396106e-05, "loss": 0.8771, "step": 85530 }, { "epoch": 7.47, "learning_rate": 1.264302559175474e-05, "loss": 0.7717, "step": 85540 }, { "epoch": 7.47, "learning_rate": 1.2638658398113373e-05, "loss": 1.0483, "step": 85550 }, { "epoch": 7.47, "learning_rate": 1.2634291204472008e-05, "loss": 0.9127, "step": 85560 }, { "epoch": 7.47, "learning_rate": 1.262992401083064e-05, "loss": 0.8462, "step": 85570 }, { "epoch": 7.47, "learning_rate": 1.2625556817189274e-05, "loss": 0.8906, "step": 85580 }, { "epoch": 7.48, "learning_rate": 1.2621189623547907e-05, "loss": 0.8834, "step": 85590 }, { "epoch": 7.48, "learning_rate": 1.2616822429906542e-05, "loss": 0.8826, "step": 85600 }, { "epoch": 7.48, "learning_rate": 1.2612455236265177e-05, "loss": 0.7559, "step": 85610 }, { "epoch": 7.48, "learning_rate": 1.2608088042623812e-05, "loss": 0.9316, "step": 85620 }, { "epoch": 7.48, "learning_rate": 1.2603720848982445e-05, "loss": 1.0081, "step": 85630 }, { "epoch": 7.48, "learning_rate": 1.2599353655341078e-05, "loss": 0.9953, "step": 85640 }, { "epoch": 7.48, "learning_rate": 1.2594986461699711e-05, "loss": 0.8498, "step": 85650 }, { "epoch": 7.48, "learning_rate": 1.2590619268058346e-05, "loss": 0.9392, "step": 85660 }, { "epoch": 7.48, "learning_rate": 1.258625207441698e-05, "loss": 0.7555, "step": 85670 }, { "epoch": 7.48, "learning_rate": 1.2581884880775613e-05, "loss": 0.9873, "step": 85680 }, { "epoch": 7.48, "learning_rate": 1.257751768713425e-05, "loss": 0.8718, "step": 85690 }, { "epoch": 7.49, "learning_rate": 1.2573150493492883e-05, "loss": 0.869, "step": 85700 }, { "epoch": 7.49, "learning_rate": 1.2568783299851516e-05, "loss": 0.9137, "step": 85710 }, { "epoch": 7.49, "learning_rate": 1.256441610621015e-05, "loss": 0.714, "step": 85720 }, { "epoch": 7.49, "learning_rate": 1.2560048912568784e-05, "loss": 0.8862, "step": 85730 }, { "epoch": 7.49, "learning_rate": 1.2555681718927417e-05, "loss": 0.8813, "step": 85740 }, { "epoch": 7.49, "learning_rate": 1.255131452528605e-05, "loss": 0.9342, "step": 85750 }, { "epoch": 7.49, "learning_rate": 1.2546947331644685e-05, "loss": 0.8091, "step": 85760 }, { "epoch": 7.49, "learning_rate": 1.254258013800332e-05, "loss": 0.807, "step": 85770 }, { "epoch": 7.49, "learning_rate": 1.2538212944361955e-05, "loss": 0.8043, "step": 85780 }, { "epoch": 7.49, "learning_rate": 1.2533845750720588e-05, "loss": 0.7075, "step": 85790 }, { "epoch": 7.49, "learning_rate": 1.2529478557079221e-05, "loss": 0.9496, "step": 85800 }, { "epoch": 7.49, "learning_rate": 1.2525111363437855e-05, "loss": 0.8888, "step": 85810 }, { "epoch": 7.5, "learning_rate": 1.252074416979649e-05, "loss": 0.9304, "step": 85820 }, { "epoch": 7.5, "learning_rate": 1.2516376976155123e-05, "loss": 0.8469, "step": 85830 }, { "epoch": 7.5, "learning_rate": 1.2512009782513756e-05, "loss": 0.8267, "step": 85840 }, { "epoch": 7.5, "learning_rate": 1.2507642588872392e-05, "loss": 0.8737, "step": 85850 }, { "epoch": 7.5, "learning_rate": 1.2503275395231026e-05, "loss": 0.8223, "step": 85860 }, { "epoch": 7.5, "learning_rate": 1.2498908201589659e-05, "loss": 0.8327, "step": 85870 }, { "epoch": 7.5, "learning_rate": 1.2494541007948294e-05, "loss": 0.8842, "step": 85880 }, { "epoch": 7.5, "learning_rate": 1.2490173814306927e-05, "loss": 0.9059, "step": 85890 }, { "epoch": 7.5, "learning_rate": 1.248580662066556e-05, "loss": 0.8842, "step": 85900 }, { "epoch": 7.5, "learning_rate": 1.2481439427024195e-05, "loss": 0.9331, "step": 85910 }, { "epoch": 7.5, "learning_rate": 1.2477072233382828e-05, "loss": 0.9256, "step": 85920 }, { "epoch": 7.51, "learning_rate": 1.2472705039741463e-05, "loss": 0.89, "step": 85930 }, { "epoch": 7.51, "learning_rate": 1.2468337846100096e-05, "loss": 1.0439, "step": 85940 }, { "epoch": 7.51, "learning_rate": 1.2463970652458731e-05, "loss": 0.9227, "step": 85950 }, { "epoch": 7.51, "learning_rate": 1.2459603458817365e-05, "loss": 0.7692, "step": 85960 }, { "epoch": 7.51, "learning_rate": 1.2455236265175998e-05, "loss": 0.8094, "step": 85970 }, { "epoch": 7.51, "learning_rate": 1.2450869071534633e-05, "loss": 0.8933, "step": 85980 }, { "epoch": 7.51, "learning_rate": 1.2446501877893266e-05, "loss": 0.8787, "step": 85990 }, { "epoch": 7.51, "learning_rate": 1.24421346842519e-05, "loss": 0.8141, "step": 86000 }, { "epoch": 7.51, "learning_rate": 1.2437767490610534e-05, "loss": 0.8747, "step": 86010 }, { "epoch": 7.51, "learning_rate": 1.2433400296969167e-05, "loss": 0.9565, "step": 86020 }, { "epoch": 7.51, "learning_rate": 1.2429033103327802e-05, "loss": 0.8599, "step": 86030 }, { "epoch": 7.52, "learning_rate": 1.2424665909686437e-05, "loss": 0.9567, "step": 86040 }, { "epoch": 7.52, "learning_rate": 1.242029871604507e-05, "loss": 0.9461, "step": 86050 }, { "epoch": 7.52, "learning_rate": 1.2415931522403703e-05, "loss": 0.9326, "step": 86060 }, { "epoch": 7.52, "learning_rate": 1.2411564328762337e-05, "loss": 0.9702, "step": 86070 }, { "epoch": 7.52, "learning_rate": 1.2407197135120973e-05, "loss": 0.8178, "step": 86080 }, { "epoch": 7.52, "learning_rate": 1.2402829941479606e-05, "loss": 0.728, "step": 86090 }, { "epoch": 7.52, "learning_rate": 1.239846274783824e-05, "loss": 0.7857, "step": 86100 }, { "epoch": 7.52, "learning_rate": 1.2394095554196873e-05, "loss": 0.9562, "step": 86110 }, { "epoch": 7.52, "learning_rate": 1.2389728360555508e-05, "loss": 0.9606, "step": 86120 }, { "epoch": 7.52, "learning_rate": 1.2385361166914143e-05, "loss": 0.8752, "step": 86130 }, { "epoch": 7.52, "learning_rate": 1.2380993973272776e-05, "loss": 0.7295, "step": 86140 }, { "epoch": 7.52, "learning_rate": 1.2376626779631409e-05, "loss": 0.863, "step": 86150 }, { "epoch": 7.53, "learning_rate": 1.2372259585990044e-05, "loss": 0.8707, "step": 86160 }, { "epoch": 7.53, "learning_rate": 1.2367892392348677e-05, "loss": 0.9473, "step": 86170 }, { "epoch": 7.53, "learning_rate": 1.2363525198707312e-05, "loss": 0.9163, "step": 86180 }, { "epoch": 7.53, "learning_rate": 1.2359158005065945e-05, "loss": 0.8767, "step": 86190 }, { "epoch": 7.53, "learning_rate": 1.235479081142458e-05, "loss": 0.793, "step": 86200 }, { "epoch": 7.53, "learning_rate": 1.2350423617783213e-05, "loss": 0.9034, "step": 86210 }, { "epoch": 7.53, "learning_rate": 1.2346056424141846e-05, "loss": 0.9515, "step": 86220 }, { "epoch": 7.53, "learning_rate": 1.2341689230500481e-05, "loss": 0.9335, "step": 86230 }, { "epoch": 7.53, "learning_rate": 1.2337322036859116e-05, "loss": 0.9757, "step": 86240 }, { "epoch": 7.53, "learning_rate": 1.233295484321775e-05, "loss": 0.7873, "step": 86250 }, { "epoch": 7.53, "learning_rate": 1.2328587649576383e-05, "loss": 0.7889, "step": 86260 }, { "epoch": 7.54, "learning_rate": 1.2324220455935016e-05, "loss": 0.7849, "step": 86270 }, { "epoch": 7.54, "learning_rate": 1.231985326229365e-05, "loss": 0.9811, "step": 86280 }, { "epoch": 7.54, "learning_rate": 1.2315486068652286e-05, "loss": 0.8847, "step": 86290 }, { "epoch": 7.54, "learning_rate": 1.2311118875010919e-05, "loss": 0.7697, "step": 86300 }, { "epoch": 7.54, "learning_rate": 1.2306751681369552e-05, "loss": 0.7323, "step": 86310 }, { "epoch": 7.54, "learning_rate": 1.2302384487728187e-05, "loss": 0.9488, "step": 86320 }, { "epoch": 7.54, "learning_rate": 1.229801729408682e-05, "loss": 0.9726, "step": 86330 }, { "epoch": 7.54, "learning_rate": 1.2293650100445455e-05, "loss": 0.8052, "step": 86340 }, { "epoch": 7.54, "learning_rate": 1.2289282906804088e-05, "loss": 0.9114, "step": 86350 }, { "epoch": 7.54, "learning_rate": 1.2284915713162721e-05, "loss": 0.758, "step": 86360 }, { "epoch": 7.54, "learning_rate": 1.2280548519521356e-05, "loss": 0.9756, "step": 86370 }, { "epoch": 7.54, "learning_rate": 1.227618132587999e-05, "loss": 0.8971, "step": 86380 }, { "epoch": 7.55, "learning_rate": 1.2271814132238624e-05, "loss": 0.8598, "step": 86390 }, { "epoch": 7.55, "learning_rate": 1.2267446938597258e-05, "loss": 0.9384, "step": 86400 }, { "epoch": 7.55, "learning_rate": 1.2263079744955893e-05, "loss": 0.9603, "step": 86410 }, { "epoch": 7.55, "learning_rate": 1.2258712551314526e-05, "loss": 0.8108, "step": 86420 }, { "epoch": 7.55, "learning_rate": 1.2254345357673159e-05, "loss": 0.8235, "step": 86430 }, { "epoch": 7.55, "learning_rate": 1.2249978164031794e-05, "loss": 0.8099, "step": 86440 }, { "epoch": 7.55, "learning_rate": 1.2245610970390429e-05, "loss": 0.8152, "step": 86450 }, { "epoch": 7.55, "learning_rate": 1.2241243776749062e-05, "loss": 1.0073, "step": 86460 }, { "epoch": 7.55, "learning_rate": 1.2236876583107695e-05, "loss": 0.8773, "step": 86470 }, { "epoch": 7.55, "learning_rate": 1.2232509389466328e-05, "loss": 0.9146, "step": 86480 }, { "epoch": 7.55, "learning_rate": 1.2228142195824963e-05, "loss": 0.8924, "step": 86490 }, { "epoch": 7.56, "learning_rate": 1.2223775002183598e-05, "loss": 0.8139, "step": 86500 }, { "epoch": 7.56, "learning_rate": 1.2219407808542231e-05, "loss": 0.8215, "step": 86510 }, { "epoch": 7.56, "learning_rate": 1.2215040614900865e-05, "loss": 0.8436, "step": 86520 }, { "epoch": 7.56, "learning_rate": 1.22106734212595e-05, "loss": 0.7973, "step": 86530 }, { "epoch": 7.56, "learning_rate": 1.2206306227618133e-05, "loss": 0.8667, "step": 86540 }, { "epoch": 7.56, "learning_rate": 1.2201939033976768e-05, "loss": 0.8477, "step": 86550 }, { "epoch": 7.56, "learning_rate": 1.21975718403354e-05, "loss": 0.8394, "step": 86560 }, { "epoch": 7.56, "learning_rate": 1.2193204646694036e-05, "loss": 0.8391, "step": 86570 }, { "epoch": 7.56, "learning_rate": 1.2188837453052669e-05, "loss": 0.9423, "step": 86580 }, { "epoch": 7.56, "learning_rate": 1.2184470259411302e-05, "loss": 0.7752, "step": 86590 }, { "epoch": 7.56, "learning_rate": 1.2180103065769937e-05, "loss": 0.7239, "step": 86600 }, { "epoch": 7.56, "learning_rate": 1.2175735872128572e-05, "loss": 0.8269, "step": 86610 }, { "epoch": 7.57, "learning_rate": 1.2171368678487205e-05, "loss": 0.9965, "step": 86620 }, { "epoch": 7.57, "learning_rate": 1.2167001484845838e-05, "loss": 1.0053, "step": 86630 }, { "epoch": 7.57, "learning_rate": 1.2162634291204471e-05, "loss": 0.7207, "step": 86640 }, { "epoch": 7.57, "learning_rate": 1.2158267097563106e-05, "loss": 0.7977, "step": 86650 }, { "epoch": 7.57, "learning_rate": 1.2153899903921741e-05, "loss": 0.9193, "step": 86660 }, { "epoch": 7.57, "learning_rate": 1.2149532710280374e-05, "loss": 0.8703, "step": 86670 }, { "epoch": 7.57, "learning_rate": 1.2145165516639008e-05, "loss": 0.9867, "step": 86680 }, { "epoch": 7.57, "learning_rate": 1.2140798322997643e-05, "loss": 0.7752, "step": 86690 }, { "epoch": 7.57, "learning_rate": 1.2136431129356276e-05, "loss": 0.8926, "step": 86700 }, { "epoch": 7.57, "learning_rate": 1.213206393571491e-05, "loss": 0.8336, "step": 86710 }, { "epoch": 7.57, "learning_rate": 1.2127696742073544e-05, "loss": 0.8651, "step": 86720 }, { "epoch": 7.58, "learning_rate": 1.2123329548432177e-05, "loss": 0.8267, "step": 86730 }, { "epoch": 7.58, "learning_rate": 1.2118962354790812e-05, "loss": 0.9409, "step": 86740 }, { "epoch": 7.58, "learning_rate": 1.2114595161149445e-05, "loss": 0.8649, "step": 86750 }, { "epoch": 7.58, "learning_rate": 1.211022796750808e-05, "loss": 0.9339, "step": 86760 }, { "epoch": 7.58, "learning_rate": 1.2105860773866713e-05, "loss": 0.9228, "step": 86770 }, { "epoch": 7.58, "learning_rate": 1.2101493580225348e-05, "loss": 0.8274, "step": 86780 }, { "epoch": 7.58, "learning_rate": 1.2097126386583981e-05, "loss": 0.9631, "step": 86790 }, { "epoch": 7.58, "learning_rate": 1.2092759192942616e-05, "loss": 0.8806, "step": 86800 }, { "epoch": 7.58, "learning_rate": 1.208839199930125e-05, "loss": 0.9198, "step": 86810 }, { "epoch": 7.58, "learning_rate": 1.2084024805659884e-05, "loss": 0.8487, "step": 86820 }, { "epoch": 7.58, "learning_rate": 1.2079657612018518e-05, "loss": 0.9057, "step": 86830 }, { "epoch": 7.58, "learning_rate": 1.207529041837715e-05, "loss": 0.8333, "step": 86840 }, { "epoch": 7.59, "learning_rate": 1.2070923224735786e-05, "loss": 0.9448, "step": 86850 }, { "epoch": 7.59, "learning_rate": 1.206655603109442e-05, "loss": 0.8847, "step": 86860 }, { "epoch": 7.59, "learning_rate": 1.2062188837453054e-05, "loss": 0.8445, "step": 86870 }, { "epoch": 7.59, "learning_rate": 1.2057821643811687e-05, "loss": 0.9275, "step": 86880 }, { "epoch": 7.59, "learning_rate": 1.205345445017032e-05, "loss": 0.9075, "step": 86890 }, { "epoch": 7.59, "learning_rate": 1.2049087256528955e-05, "loss": 0.9359, "step": 86900 }, { "epoch": 7.59, "learning_rate": 1.204472006288759e-05, "loss": 0.7526, "step": 86910 }, { "epoch": 7.59, "learning_rate": 1.2040352869246223e-05, "loss": 0.9308, "step": 86920 }, { "epoch": 7.59, "learning_rate": 1.2035985675604856e-05, "loss": 0.9161, "step": 86930 }, { "epoch": 7.59, "learning_rate": 1.2031618481963491e-05, "loss": 0.8815, "step": 86940 }, { "epoch": 7.59, "learning_rate": 1.2027251288322124e-05, "loss": 0.9198, "step": 86950 }, { "epoch": 7.6, "learning_rate": 1.202288409468076e-05, "loss": 0.9744, "step": 86960 }, { "epoch": 7.6, "learning_rate": 1.2018516901039393e-05, "loss": 0.9414, "step": 86970 }, { "epoch": 7.6, "learning_rate": 1.2014149707398027e-05, "loss": 0.825, "step": 86980 }, { "epoch": 7.6, "learning_rate": 1.200978251375666e-05, "loss": 0.8658, "step": 86990 }, { "epoch": 7.6, "learning_rate": 1.2005415320115294e-05, "loss": 0.793, "step": 87000 }, { "epoch": 7.6, "learning_rate": 1.2001048126473929e-05, "loss": 0.8725, "step": 87010 }, { "epoch": 7.6, "learning_rate": 1.1996680932832564e-05, "loss": 0.9457, "step": 87020 }, { "epoch": 7.6, "learning_rate": 1.1992313739191197e-05, "loss": 0.8539, "step": 87030 }, { "epoch": 7.6, "learning_rate": 1.198794654554983e-05, "loss": 0.9731, "step": 87040 }, { "epoch": 7.6, "learning_rate": 1.1983579351908463e-05, "loss": 0.8243, "step": 87050 }, { "epoch": 7.6, "learning_rate": 1.1979212158267098e-05, "loss": 0.7373, "step": 87060 }, { "epoch": 7.61, "learning_rate": 1.1974844964625733e-05, "loss": 0.9377, "step": 87070 }, { "epoch": 7.61, "learning_rate": 1.1970477770984366e-05, "loss": 0.8185, "step": 87080 }, { "epoch": 7.61, "learning_rate": 1.1966110577343e-05, "loss": 0.8386, "step": 87090 }, { "epoch": 7.61, "learning_rate": 1.1961743383701633e-05, "loss": 0.8788, "step": 87100 }, { "epoch": 7.61, "learning_rate": 1.1957376190060268e-05, "loss": 0.9667, "step": 87110 }, { "epoch": 7.61, "learning_rate": 1.1953008996418902e-05, "loss": 0.9161, "step": 87120 }, { "epoch": 7.61, "learning_rate": 1.1948641802777536e-05, "loss": 0.8146, "step": 87130 }, { "epoch": 7.61, "learning_rate": 1.1944274609136169e-05, "loss": 0.9237, "step": 87140 }, { "epoch": 7.61, "learning_rate": 1.1939907415494804e-05, "loss": 0.8104, "step": 87150 }, { "epoch": 7.61, "learning_rate": 1.1935540221853437e-05, "loss": 0.7976, "step": 87160 }, { "epoch": 7.61, "learning_rate": 1.1931173028212072e-05, "loss": 0.8839, "step": 87170 }, { "epoch": 7.61, "learning_rate": 1.1926805834570705e-05, "loss": 0.9369, "step": 87180 }, { "epoch": 7.62, "learning_rate": 1.192243864092934e-05, "loss": 0.8975, "step": 87190 }, { "epoch": 7.62, "learning_rate": 1.1918071447287973e-05, "loss": 0.8206, "step": 87200 }, { "epoch": 7.62, "learning_rate": 1.1913704253646606e-05, "loss": 0.8585, "step": 87210 }, { "epoch": 7.62, "learning_rate": 1.1909337060005241e-05, "loss": 0.9531, "step": 87220 }, { "epoch": 7.62, "learning_rate": 1.1904969866363876e-05, "loss": 0.8872, "step": 87230 }, { "epoch": 7.62, "learning_rate": 1.190060267272251e-05, "loss": 0.8317, "step": 87240 }, { "epoch": 7.62, "learning_rate": 1.1896235479081143e-05, "loss": 0.848, "step": 87250 }, { "epoch": 7.62, "learning_rate": 1.1891868285439776e-05, "loss": 1.0307, "step": 87260 }, { "epoch": 7.62, "learning_rate": 1.188750109179841e-05, "loss": 0.7683, "step": 87270 }, { "epoch": 7.62, "learning_rate": 1.1883133898157046e-05, "loss": 0.8872, "step": 87280 }, { "epoch": 7.62, "learning_rate": 1.1878766704515679e-05, "loss": 0.8244, "step": 87290 }, { "epoch": 7.63, "learning_rate": 1.1874399510874312e-05, "loss": 0.9062, "step": 87300 }, { "epoch": 7.63, "learning_rate": 1.1870032317232947e-05, "loss": 0.7437, "step": 87310 }, { "epoch": 7.63, "learning_rate": 1.186566512359158e-05, "loss": 0.922, "step": 87320 }, { "epoch": 7.63, "learning_rate": 1.1861297929950215e-05, "loss": 0.8428, "step": 87330 }, { "epoch": 7.63, "learning_rate": 1.1856930736308848e-05, "loss": 0.907, "step": 87340 }, { "epoch": 7.63, "learning_rate": 1.1852563542667483e-05, "loss": 0.8458, "step": 87350 }, { "epoch": 7.63, "learning_rate": 1.1848196349026116e-05, "loss": 0.953, "step": 87360 }, { "epoch": 7.63, "learning_rate": 1.184382915538475e-05, "loss": 0.8095, "step": 87370 }, { "epoch": 7.63, "learning_rate": 1.1839461961743384e-05, "loss": 0.8702, "step": 87380 }, { "epoch": 7.63, "learning_rate": 1.183509476810202e-05, "loss": 0.7163, "step": 87390 }, { "epoch": 7.63, "learning_rate": 1.1830727574460652e-05, "loss": 0.9221, "step": 87400 }, { "epoch": 7.63, "learning_rate": 1.1826360380819286e-05, "loss": 0.8422, "step": 87410 }, { "epoch": 7.64, "learning_rate": 1.1821993187177919e-05, "loss": 0.9211, "step": 87420 }, { "epoch": 7.64, "learning_rate": 1.1817625993536555e-05, "loss": 0.9316, "step": 87430 }, { "epoch": 7.64, "learning_rate": 1.1813258799895189e-05, "loss": 0.9003, "step": 87440 }, { "epoch": 7.64, "learning_rate": 1.1808891606253822e-05, "loss": 0.8298, "step": 87450 }, { "epoch": 7.64, "learning_rate": 1.1804524412612455e-05, "loss": 0.8361, "step": 87460 }, { "epoch": 7.64, "learning_rate": 1.1800157218971088e-05, "loss": 0.7814, "step": 87470 }, { "epoch": 7.64, "learning_rate": 1.1795790025329725e-05, "loss": 0.8096, "step": 87480 }, { "epoch": 7.64, "learning_rate": 1.1791422831688358e-05, "loss": 0.8267, "step": 87490 }, { "epoch": 7.64, "learning_rate": 1.1787055638046991e-05, "loss": 0.8159, "step": 87500 }, { "epoch": 7.64, "learning_rate": 1.1782688444405624e-05, "loss": 0.9514, "step": 87510 }, { "epoch": 7.64, "learning_rate": 1.177832125076426e-05, "loss": 0.7852, "step": 87520 }, { "epoch": 7.65, "learning_rate": 1.1773954057122894e-05, "loss": 0.8226, "step": 87530 }, { "epoch": 7.65, "learning_rate": 1.1769586863481527e-05, "loss": 0.959, "step": 87540 }, { "epoch": 7.65, "learning_rate": 1.176521966984016e-05, "loss": 0.8452, "step": 87550 }, { "epoch": 7.65, "learning_rate": 1.1760852476198796e-05, "loss": 0.9848, "step": 87560 }, { "epoch": 7.65, "learning_rate": 1.1756485282557429e-05, "loss": 0.7659, "step": 87570 }, { "epoch": 7.65, "learning_rate": 1.1752118088916064e-05, "loss": 0.9186, "step": 87580 }, { "epoch": 7.65, "learning_rate": 1.1747750895274697e-05, "loss": 0.8036, "step": 87590 }, { "epoch": 7.65, "learning_rate": 1.1743383701633332e-05, "loss": 0.9748, "step": 87600 }, { "epoch": 7.65, "learning_rate": 1.1739016507991965e-05, "loss": 0.9465, "step": 87610 }, { "epoch": 7.65, "learning_rate": 1.1734649314350598e-05, "loss": 0.9714, "step": 87620 }, { "epoch": 7.65, "learning_rate": 1.1730282120709233e-05, "loss": 0.864, "step": 87630 }, { "epoch": 7.65, "learning_rate": 1.1725914927067868e-05, "loss": 0.9124, "step": 87640 }, { "epoch": 7.66, "learning_rate": 1.1721547733426501e-05, "loss": 0.8297, "step": 87650 }, { "epoch": 7.66, "learning_rate": 1.1717180539785134e-05, "loss": 0.9108, "step": 87660 }, { "epoch": 7.66, "learning_rate": 1.1712813346143768e-05, "loss": 0.9888, "step": 87670 }, { "epoch": 7.66, "learning_rate": 1.1708446152502402e-05, "loss": 0.869, "step": 87680 }, { "epoch": 7.66, "learning_rate": 1.1704078958861037e-05, "loss": 0.8638, "step": 87690 }, { "epoch": 7.66, "learning_rate": 1.169971176521967e-05, "loss": 0.7889, "step": 87700 }, { "epoch": 7.66, "learning_rate": 1.1695344571578304e-05, "loss": 0.8252, "step": 87710 }, { "epoch": 7.66, "learning_rate": 1.1690977377936939e-05, "loss": 0.9702, "step": 87720 }, { "epoch": 7.66, "learning_rate": 1.1686610184295572e-05, "loss": 0.7646, "step": 87730 }, { "epoch": 7.66, "learning_rate": 1.1682242990654207e-05, "loss": 0.7999, "step": 87740 }, { "epoch": 7.66, "learning_rate": 1.167787579701284e-05, "loss": 0.9074, "step": 87750 }, { "epoch": 7.67, "learning_rate": 1.1673508603371475e-05, "loss": 0.8922, "step": 87760 }, { "epoch": 7.67, "learning_rate": 1.1669141409730108e-05, "loss": 0.758, "step": 87770 }, { "epoch": 7.67, "learning_rate": 1.1664774216088741e-05, "loss": 0.9147, "step": 87780 }, { "epoch": 7.67, "learning_rate": 1.1660407022447376e-05, "loss": 0.8686, "step": 87790 }, { "epoch": 7.67, "learning_rate": 1.1656039828806011e-05, "loss": 0.9669, "step": 87800 }, { "epoch": 7.67, "learning_rate": 1.1651672635164644e-05, "loss": 0.7523, "step": 87810 }, { "epoch": 7.67, "learning_rate": 1.1647305441523277e-05, "loss": 0.8965, "step": 87820 }, { "epoch": 7.67, "learning_rate": 1.164293824788191e-05, "loss": 0.7299, "step": 87830 }, { "epoch": 7.67, "learning_rate": 1.1638571054240546e-05, "loss": 1.0058, "step": 87840 }, { "epoch": 7.67, "learning_rate": 1.163420386059918e-05, "loss": 0.8353, "step": 87850 }, { "epoch": 7.67, "learning_rate": 1.1629836666957814e-05, "loss": 0.8289, "step": 87860 }, { "epoch": 7.67, "learning_rate": 1.1625469473316447e-05, "loss": 0.9672, "step": 87870 }, { "epoch": 7.68, "learning_rate": 1.162110227967508e-05, "loss": 0.8718, "step": 87880 }, { "epoch": 7.68, "learning_rate": 1.1616735086033715e-05, "loss": 0.944, "step": 87890 }, { "epoch": 7.68, "learning_rate": 1.161236789239235e-05, "loss": 0.9042, "step": 87900 }, { "epoch": 7.68, "learning_rate": 1.1608000698750983e-05, "loss": 0.8895, "step": 87910 }, { "epoch": 7.68, "learning_rate": 1.1603633505109616e-05, "loss": 0.999, "step": 87920 }, { "epoch": 7.68, "learning_rate": 1.1599266311468251e-05, "loss": 0.9204, "step": 87930 }, { "epoch": 7.68, "learning_rate": 1.1594899117826884e-05, "loss": 0.8419, "step": 87940 }, { "epoch": 7.68, "learning_rate": 1.159053192418552e-05, "loss": 0.8422, "step": 87950 }, { "epoch": 7.68, "learning_rate": 1.1586164730544152e-05, "loss": 1.004, "step": 87960 }, { "epoch": 7.68, "learning_rate": 1.1581797536902787e-05, "loss": 0.9681, "step": 87970 }, { "epoch": 7.68, "learning_rate": 1.157743034326142e-05, "loss": 0.8469, "step": 87980 }, { "epoch": 7.69, "learning_rate": 1.1573063149620054e-05, "loss": 0.8596, "step": 87990 }, { "epoch": 7.69, "learning_rate": 1.1568695955978689e-05, "loss": 0.9047, "step": 88000 }, { "epoch": 7.69, "learning_rate": 1.1564328762337324e-05, "loss": 1.0193, "step": 88010 }, { "epoch": 7.69, "learning_rate": 1.1559961568695957e-05, "loss": 0.9367, "step": 88020 }, { "epoch": 7.69, "learning_rate": 1.155559437505459e-05, "loss": 0.8306, "step": 88030 }, { "epoch": 7.69, "learning_rate": 1.1551227181413223e-05, "loss": 0.8465, "step": 88040 }, { "epoch": 7.69, "learning_rate": 1.1546859987771858e-05, "loss": 0.8599, "step": 88050 }, { "epoch": 7.69, "learning_rate": 1.1542492794130493e-05, "loss": 0.8051, "step": 88060 }, { "epoch": 7.69, "learning_rate": 1.1538125600489126e-05, "loss": 0.9569, "step": 88070 }, { "epoch": 7.69, "learning_rate": 1.153375840684776e-05, "loss": 0.8274, "step": 88080 }, { "epoch": 7.69, "learning_rate": 1.1529391213206394e-05, "loss": 0.8387, "step": 88090 }, { "epoch": 7.69, "learning_rate": 1.1525024019565027e-05, "loss": 0.8953, "step": 88100 }, { "epoch": 7.7, "learning_rate": 1.1520656825923662e-05, "loss": 0.9522, "step": 88110 }, { "epoch": 7.7, "learning_rate": 1.1516289632282296e-05, "loss": 0.9474, "step": 88120 }, { "epoch": 7.7, "learning_rate": 1.151192243864093e-05, "loss": 0.966, "step": 88130 }, { "epoch": 7.7, "learning_rate": 1.1507555244999564e-05, "loss": 0.9399, "step": 88140 }, { "epoch": 7.7, "learning_rate": 1.1503188051358197e-05, "loss": 0.7345, "step": 88150 }, { "epoch": 7.7, "learning_rate": 1.1498820857716832e-05, "loss": 0.8391, "step": 88160 }, { "epoch": 7.7, "learning_rate": 1.1494453664075467e-05, "loss": 0.9026, "step": 88170 }, { "epoch": 7.7, "learning_rate": 1.14900864704341e-05, "loss": 0.8699, "step": 88180 }, { "epoch": 7.7, "learning_rate": 1.1485719276792733e-05, "loss": 0.8168, "step": 88190 }, { "epoch": 7.7, "learning_rate": 1.1481352083151366e-05, "loss": 0.8666, "step": 88200 }, { "epoch": 7.7, "learning_rate": 1.1476984889510003e-05, "loss": 0.8739, "step": 88210 }, { "epoch": 7.71, "learning_rate": 1.1472617695868636e-05, "loss": 0.8381, "step": 88220 }, { "epoch": 7.71, "learning_rate": 1.146825050222727e-05, "loss": 0.853, "step": 88230 }, { "epoch": 7.71, "learning_rate": 1.1463883308585902e-05, "loss": 0.9581, "step": 88240 }, { "epoch": 7.71, "learning_rate": 1.1459516114944536e-05, "loss": 0.9574, "step": 88250 }, { "epoch": 7.71, "learning_rate": 1.1455148921303172e-05, "loss": 0.8966, "step": 88260 }, { "epoch": 7.71, "learning_rate": 1.1450781727661805e-05, "loss": 0.9555, "step": 88270 }, { "epoch": 7.71, "learning_rate": 1.1446414534020439e-05, "loss": 0.934, "step": 88280 }, { "epoch": 7.71, "learning_rate": 1.1442047340379072e-05, "loss": 0.8072, "step": 88290 }, { "epoch": 7.71, "learning_rate": 1.1437680146737707e-05, "loss": 0.9366, "step": 88300 }, { "epoch": 7.71, "learning_rate": 1.1433312953096342e-05, "loss": 0.7933, "step": 88310 }, { "epoch": 7.71, "learning_rate": 1.1428945759454975e-05, "loss": 0.9329, "step": 88320 }, { "epoch": 7.72, "learning_rate": 1.1424578565813608e-05, "loss": 0.844, "step": 88330 }, { "epoch": 7.72, "learning_rate": 1.1420211372172243e-05, "loss": 0.9451, "step": 88340 }, { "epoch": 7.72, "learning_rate": 1.1415844178530876e-05, "loss": 0.8596, "step": 88350 }, { "epoch": 7.72, "learning_rate": 1.1411476984889511e-05, "loss": 0.8853, "step": 88360 }, { "epoch": 7.72, "learning_rate": 1.1407109791248144e-05, "loss": 0.8282, "step": 88370 }, { "epoch": 7.72, "learning_rate": 1.140274259760678e-05, "loss": 0.9223, "step": 88380 }, { "epoch": 7.72, "learning_rate": 1.1398375403965412e-05, "loss": 0.8436, "step": 88390 }, { "epoch": 7.72, "learning_rate": 1.1394008210324046e-05, "loss": 0.7489, "step": 88400 }, { "epoch": 7.72, "learning_rate": 1.138964101668268e-05, "loss": 0.8838, "step": 88410 }, { "epoch": 7.72, "learning_rate": 1.1385273823041315e-05, "loss": 0.974, "step": 88420 }, { "epoch": 7.72, "learning_rate": 1.1380906629399949e-05, "loss": 0.9624, "step": 88430 }, { "epoch": 7.72, "learning_rate": 1.1376539435758582e-05, "loss": 0.7682, "step": 88440 }, { "epoch": 7.73, "learning_rate": 1.1372172242117215e-05, "loss": 0.8941, "step": 88450 }, { "epoch": 7.73, "learning_rate": 1.136780504847585e-05, "loss": 0.7783, "step": 88460 }, { "epoch": 7.73, "learning_rate": 1.1363437854834485e-05, "loss": 0.8051, "step": 88470 }, { "epoch": 7.73, "learning_rate": 1.1359070661193118e-05, "loss": 0.8783, "step": 88480 }, { "epoch": 7.73, "learning_rate": 1.1354703467551751e-05, "loss": 0.9082, "step": 88490 }, { "epoch": 7.73, "learning_rate": 1.1350336273910386e-05, "loss": 0.9201, "step": 88500 }, { "epoch": 7.73, "learning_rate": 1.134596908026902e-05, "loss": 0.9715, "step": 88510 }, { "epoch": 7.73, "learning_rate": 1.1341601886627654e-05, "loss": 0.9941, "step": 88520 }, { "epoch": 7.73, "learning_rate": 1.1337234692986287e-05, "loss": 0.839, "step": 88530 }, { "epoch": 7.73, "learning_rate": 1.1332867499344922e-05, "loss": 0.9193, "step": 88540 }, { "epoch": 7.73, "learning_rate": 1.1328500305703556e-05, "loss": 0.9654, "step": 88550 }, { "epoch": 7.74, "learning_rate": 1.1324133112062189e-05, "loss": 0.858, "step": 88560 }, { "epoch": 7.74, "learning_rate": 1.1319765918420824e-05, "loss": 0.8929, "step": 88570 }, { "epoch": 7.74, "learning_rate": 1.1315398724779458e-05, "loss": 0.8702, "step": 88580 }, { "epoch": 7.74, "learning_rate": 1.1311031531138092e-05, "loss": 0.8251, "step": 88590 }, { "epoch": 7.74, "learning_rate": 1.1306664337496725e-05, "loss": 0.8703, "step": 88600 }, { "epoch": 7.74, "learning_rate": 1.1302297143855358e-05, "loss": 0.9018, "step": 88610 }, { "epoch": 7.74, "learning_rate": 1.1297929950213993e-05, "loss": 0.9676, "step": 88620 }, { "epoch": 7.74, "learning_rate": 1.1293562756572628e-05, "loss": 0.9136, "step": 88630 }, { "epoch": 7.74, "learning_rate": 1.1289195562931261e-05, "loss": 0.9059, "step": 88640 }, { "epoch": 7.74, "learning_rate": 1.1284828369289894e-05, "loss": 0.9121, "step": 88650 }, { "epoch": 7.74, "learning_rate": 1.1280461175648528e-05, "loss": 0.9267, "step": 88660 }, { "epoch": 7.74, "learning_rate": 1.1276093982007162e-05, "loss": 0.7734, "step": 88670 }, { "epoch": 7.75, "learning_rate": 1.1271726788365797e-05, "loss": 0.9507, "step": 88680 }, { "epoch": 7.75, "learning_rate": 1.126735959472443e-05, "loss": 0.8901, "step": 88690 }, { "epoch": 7.75, "learning_rate": 1.1262992401083064e-05, "loss": 0.7314, "step": 88700 }, { "epoch": 7.75, "learning_rate": 1.1258625207441699e-05, "loss": 0.771, "step": 88710 }, { "epoch": 7.75, "learning_rate": 1.1254258013800332e-05, "loss": 0.9037, "step": 88720 }, { "epoch": 7.75, "learning_rate": 1.1249890820158967e-05, "loss": 0.8891, "step": 88730 }, { "epoch": 7.75, "learning_rate": 1.12455236265176e-05, "loss": 0.9368, "step": 88740 }, { "epoch": 7.75, "learning_rate": 1.1241156432876235e-05, "loss": 0.8911, "step": 88750 }, { "epoch": 7.75, "learning_rate": 1.1236789239234868e-05, "loss": 0.9065, "step": 88760 }, { "epoch": 7.75, "learning_rate": 1.1232422045593501e-05, "loss": 0.8405, "step": 88770 }, { "epoch": 7.75, "learning_rate": 1.1228054851952136e-05, "loss": 0.8945, "step": 88780 }, { "epoch": 7.76, "learning_rate": 1.1223687658310771e-05, "loss": 0.8847, "step": 88790 }, { "epoch": 7.76, "learning_rate": 1.1219320464669404e-05, "loss": 0.8439, "step": 88800 }, { "epoch": 7.76, "learning_rate": 1.1214953271028037e-05, "loss": 1.0127, "step": 88810 }, { "epoch": 7.76, "learning_rate": 1.121058607738667e-05, "loss": 0.8958, "step": 88820 }, { "epoch": 7.76, "learning_rate": 1.1206218883745307e-05, "loss": 0.8756, "step": 88830 }, { "epoch": 7.76, "learning_rate": 1.120185169010394e-05, "loss": 0.9373, "step": 88840 }, { "epoch": 7.76, "learning_rate": 1.1197484496462574e-05, "loss": 0.9448, "step": 88850 }, { "epoch": 7.76, "learning_rate": 1.1193117302821207e-05, "loss": 0.8244, "step": 88860 }, { "epoch": 7.76, "learning_rate": 1.1188750109179842e-05, "loss": 0.864, "step": 88870 }, { "epoch": 7.76, "learning_rate": 1.1184382915538477e-05, "loss": 0.8362, "step": 88880 }, { "epoch": 7.76, "learning_rate": 1.118001572189711e-05, "loss": 0.8646, "step": 88890 }, { "epoch": 7.76, "learning_rate": 1.1175648528255743e-05, "loss": 0.9937, "step": 88900 }, { "epoch": 7.77, "learning_rate": 1.1171281334614378e-05, "loss": 0.8227, "step": 88910 }, { "epoch": 7.77, "learning_rate": 1.1166914140973011e-05, "loss": 0.8213, "step": 88920 }, { "epoch": 7.77, "learning_rate": 1.1162546947331646e-05, "loss": 1.0285, "step": 88930 }, { "epoch": 7.77, "learning_rate": 1.115817975369028e-05, "loss": 0.7389, "step": 88940 }, { "epoch": 7.77, "learning_rate": 1.1153812560048914e-05, "loss": 0.7841, "step": 88950 }, { "epoch": 7.77, "learning_rate": 1.1149445366407547e-05, "loss": 0.8089, "step": 88960 }, { "epoch": 7.77, "learning_rate": 1.114507817276618e-05, "loss": 0.9032, "step": 88970 }, { "epoch": 7.77, "learning_rate": 1.1140710979124815e-05, "loss": 0.9376, "step": 88980 }, { "epoch": 7.77, "learning_rate": 1.1136343785483449e-05, "loss": 0.9137, "step": 88990 }, { "epoch": 7.77, "learning_rate": 1.1131976591842084e-05, "loss": 0.7653, "step": 89000 }, { "epoch": 7.77, "learning_rate": 1.1127609398200717e-05, "loss": 0.7867, "step": 89010 }, { "epoch": 7.78, "learning_rate": 1.112324220455935e-05, "loss": 0.8513, "step": 89020 }, { "epoch": 7.78, "learning_rate": 1.1118875010917985e-05, "loss": 0.8384, "step": 89030 }, { "epoch": 7.78, "learning_rate": 1.111450781727662e-05, "loss": 0.8945, "step": 89040 }, { "epoch": 7.78, "learning_rate": 1.1110140623635253e-05, "loss": 0.7707, "step": 89050 }, { "epoch": 7.78, "learning_rate": 1.1105773429993886e-05, "loss": 0.7619, "step": 89060 }, { "epoch": 7.78, "learning_rate": 1.110140623635252e-05, "loss": 0.8466, "step": 89070 }, { "epoch": 7.78, "learning_rate": 1.1097039042711154e-05, "loss": 0.9133, "step": 89080 }, { "epoch": 7.78, "learning_rate": 1.1092671849069789e-05, "loss": 0.8505, "step": 89090 }, { "epoch": 7.78, "learning_rate": 1.1088304655428422e-05, "loss": 0.9353, "step": 89100 }, { "epoch": 7.78, "learning_rate": 1.1083937461787056e-05, "loss": 0.8984, "step": 89110 }, { "epoch": 7.78, "learning_rate": 1.107957026814569e-05, "loss": 0.9588, "step": 89120 }, { "epoch": 7.78, "learning_rate": 1.1075203074504324e-05, "loss": 0.8374, "step": 89130 }, { "epoch": 7.79, "learning_rate": 1.1070835880862959e-05, "loss": 0.8169, "step": 89140 }, { "epoch": 7.79, "learning_rate": 1.1066468687221592e-05, "loss": 0.8069, "step": 89150 }, { "epoch": 7.79, "learning_rate": 1.1062101493580227e-05, "loss": 0.8892, "step": 89160 }, { "epoch": 7.79, "learning_rate": 1.105773429993886e-05, "loss": 0.8958, "step": 89170 }, { "epoch": 7.79, "learning_rate": 1.1053367106297493e-05, "loss": 0.9282, "step": 89180 }, { "epoch": 7.79, "learning_rate": 1.1048999912656128e-05, "loss": 0.9285, "step": 89190 }, { "epoch": 7.79, "learning_rate": 1.1044632719014763e-05, "loss": 0.8774, "step": 89200 }, { "epoch": 7.79, "learning_rate": 1.1040265525373396e-05, "loss": 0.8764, "step": 89210 }, { "epoch": 7.79, "learning_rate": 1.103589833173203e-05, "loss": 0.9403, "step": 89220 }, { "epoch": 7.79, "learning_rate": 1.1031531138090662e-05, "loss": 0.8622, "step": 89230 }, { "epoch": 7.79, "learning_rate": 1.1027163944449297e-05, "loss": 0.8529, "step": 89240 }, { "epoch": 7.8, "learning_rate": 1.1022796750807932e-05, "loss": 0.766, "step": 89250 }, { "epoch": 7.8, "learning_rate": 1.1018429557166565e-05, "loss": 0.9419, "step": 89260 }, { "epoch": 7.8, "learning_rate": 1.1014062363525199e-05, "loss": 0.8218, "step": 89270 }, { "epoch": 7.8, "learning_rate": 1.1009695169883834e-05, "loss": 0.9284, "step": 89280 }, { "epoch": 7.8, "learning_rate": 1.1005327976242467e-05, "loss": 0.8526, "step": 89290 }, { "epoch": 7.8, "learning_rate": 1.1000960782601102e-05, "loss": 0.9226, "step": 89300 }, { "epoch": 7.8, "learning_rate": 1.0996593588959735e-05, "loss": 0.8613, "step": 89310 }, { "epoch": 7.8, "learning_rate": 1.099222639531837e-05, "loss": 0.9473, "step": 89320 }, { "epoch": 7.8, "learning_rate": 1.0987859201677003e-05, "loss": 0.9129, "step": 89330 }, { "epoch": 7.8, "learning_rate": 1.0983492008035636e-05, "loss": 0.8756, "step": 89340 }, { "epoch": 7.8, "learning_rate": 1.0979124814394271e-05, "loss": 0.8903, "step": 89350 }, { "epoch": 7.81, "learning_rate": 1.0974757620752904e-05, "loss": 0.8289, "step": 89360 }, { "epoch": 7.81, "learning_rate": 1.0970390427111539e-05, "loss": 0.8875, "step": 89370 }, { "epoch": 7.81, "learning_rate": 1.0966023233470172e-05, "loss": 0.827, "step": 89380 }, { "epoch": 7.81, "learning_rate": 1.0961656039828806e-05, "loss": 0.8074, "step": 89390 }, { "epoch": 7.81, "learning_rate": 1.095728884618744e-05, "loss": 0.9072, "step": 89400 }, { "epoch": 7.81, "learning_rate": 1.0952921652546075e-05, "loss": 0.8447, "step": 89410 }, { "epoch": 7.81, "learning_rate": 1.0948554458904709e-05, "loss": 0.7952, "step": 89420 }, { "epoch": 7.81, "learning_rate": 1.0944187265263342e-05, "loss": 0.8533, "step": 89430 }, { "epoch": 7.81, "learning_rate": 1.0939820071621975e-05, "loss": 0.8887, "step": 89440 }, { "epoch": 7.81, "learning_rate": 1.093545287798061e-05, "loss": 0.8511, "step": 89450 }, { "epoch": 7.81, "learning_rate": 1.0931085684339245e-05, "loss": 0.7377, "step": 89460 }, { "epoch": 7.81, "learning_rate": 1.0926718490697878e-05, "loss": 0.8044, "step": 89470 }, { "epoch": 7.82, "learning_rate": 1.0922351297056511e-05, "loss": 0.8344, "step": 89480 }, { "epoch": 7.82, "learning_rate": 1.0917984103415146e-05, "loss": 0.8518, "step": 89490 }, { "epoch": 7.82, "learning_rate": 1.091361690977378e-05, "loss": 0.9273, "step": 89500 }, { "epoch": 7.82, "learning_rate": 1.0909249716132414e-05, "loss": 0.9575, "step": 89510 }, { "epoch": 7.82, "learning_rate": 1.0904882522491047e-05, "loss": 0.9561, "step": 89520 }, { "epoch": 7.82, "learning_rate": 1.0900515328849682e-05, "loss": 0.8519, "step": 89530 }, { "epoch": 7.82, "learning_rate": 1.0896148135208315e-05, "loss": 0.8006, "step": 89540 }, { "epoch": 7.82, "learning_rate": 1.0891780941566949e-05, "loss": 0.9219, "step": 89550 }, { "epoch": 7.82, "learning_rate": 1.0887413747925584e-05, "loss": 0.8512, "step": 89560 }, { "epoch": 7.82, "learning_rate": 1.0883046554284218e-05, "loss": 0.9042, "step": 89570 }, { "epoch": 7.82, "learning_rate": 1.0878679360642852e-05, "loss": 1.0375, "step": 89580 }, { "epoch": 7.83, "learning_rate": 1.0874312167001485e-05, "loss": 1.0311, "step": 89590 }, { "epoch": 7.83, "learning_rate": 1.0869944973360118e-05, "loss": 0.9108, "step": 89600 }, { "epoch": 7.83, "learning_rate": 1.0865577779718755e-05, "loss": 0.8159, "step": 89610 }, { "epoch": 7.83, "learning_rate": 1.0861210586077388e-05, "loss": 0.888, "step": 89620 }, { "epoch": 7.83, "learning_rate": 1.0856843392436021e-05, "loss": 0.8346, "step": 89630 }, { "epoch": 7.83, "learning_rate": 1.0852476198794654e-05, "loss": 0.931, "step": 89640 }, { "epoch": 7.83, "learning_rate": 1.0848109005153289e-05, "loss": 0.8591, "step": 89650 }, { "epoch": 7.83, "learning_rate": 1.0843741811511924e-05, "loss": 0.9058, "step": 89660 }, { "epoch": 7.83, "learning_rate": 1.0839374617870557e-05, "loss": 0.85, "step": 89670 }, { "epoch": 7.83, "learning_rate": 1.083500742422919e-05, "loss": 0.7735, "step": 89680 }, { "epoch": 7.83, "learning_rate": 1.0830640230587825e-05, "loss": 1.027, "step": 89690 }, { "epoch": 7.83, "learning_rate": 1.0826273036946459e-05, "loss": 0.8869, "step": 89700 }, { "epoch": 7.84, "learning_rate": 1.0821905843305093e-05, "loss": 0.9471, "step": 89710 }, { "epoch": 7.84, "learning_rate": 1.0817538649663727e-05, "loss": 0.8008, "step": 89720 }, { "epoch": 7.84, "learning_rate": 1.081317145602236e-05, "loss": 0.8493, "step": 89730 }, { "epoch": 7.84, "learning_rate": 1.0808804262380995e-05, "loss": 0.7998, "step": 89740 }, { "epoch": 7.84, "learning_rate": 1.0804437068739628e-05, "loss": 0.7978, "step": 89750 }, { "epoch": 7.84, "learning_rate": 1.0800069875098263e-05, "loss": 0.8392, "step": 89760 }, { "epoch": 7.84, "learning_rate": 1.0795702681456896e-05, "loss": 0.9698, "step": 89770 }, { "epoch": 7.84, "learning_rate": 1.0791335487815531e-05, "loss": 0.8337, "step": 89780 }, { "epoch": 7.84, "learning_rate": 1.0786968294174164e-05, "loss": 0.911, "step": 89790 }, { "epoch": 7.84, "learning_rate": 1.0782601100532797e-05, "loss": 0.7266, "step": 89800 }, { "epoch": 7.84, "learning_rate": 1.0778233906891432e-05, "loss": 0.8812, "step": 89810 }, { "epoch": 7.85, "learning_rate": 1.0773866713250067e-05, "loss": 0.8224, "step": 89820 }, { "epoch": 7.85, "learning_rate": 1.07694995196087e-05, "loss": 0.9821, "step": 89830 }, { "epoch": 7.85, "learning_rate": 1.0765132325967334e-05, "loss": 0.9171, "step": 89840 }, { "epoch": 7.85, "learning_rate": 1.0760765132325967e-05, "loss": 0.7489, "step": 89850 }, { "epoch": 7.85, "learning_rate": 1.0756397938684602e-05, "loss": 0.9384, "step": 89860 }, { "epoch": 7.85, "learning_rate": 1.0752030745043237e-05, "loss": 1.0107, "step": 89870 }, { "epoch": 7.85, "learning_rate": 1.074766355140187e-05, "loss": 0.7912, "step": 89880 }, { "epoch": 7.85, "learning_rate": 1.0743296357760503e-05, "loss": 0.8743, "step": 89890 }, { "epoch": 7.85, "learning_rate": 1.0738929164119138e-05, "loss": 0.8277, "step": 89900 }, { "epoch": 7.85, "learning_rate": 1.0734561970477771e-05, "loss": 0.8083, "step": 89910 }, { "epoch": 7.85, "learning_rate": 1.0730194776836406e-05, "loss": 0.8241, "step": 89920 }, { "epoch": 7.85, "learning_rate": 1.072582758319504e-05, "loss": 0.9841, "step": 89930 }, { "epoch": 7.86, "learning_rate": 1.0721460389553674e-05, "loss": 0.8739, "step": 89940 }, { "epoch": 7.86, "learning_rate": 1.0717093195912307e-05, "loss": 1.1026, "step": 89950 }, { "epoch": 7.86, "learning_rate": 1.071272600227094e-05, "loss": 0.8715, "step": 89960 }, { "epoch": 7.86, "learning_rate": 1.0708358808629575e-05, "loss": 0.8374, "step": 89970 }, { "epoch": 7.86, "learning_rate": 1.070399161498821e-05, "loss": 0.9358, "step": 89980 }, { "epoch": 7.86, "learning_rate": 1.0699624421346843e-05, "loss": 0.919, "step": 89990 }, { "epoch": 7.86, "learning_rate": 1.0695257227705477e-05, "loss": 0.9656, "step": 90000 }, { "epoch": 7.86, "eval_accuracy": 0.5667682028102584, "eval_loss": 0.8916383385658264, "eval_runtime": 84.0851, "eval_samples_per_second": 121.032, "eval_steps_per_second": 15.139, "step": 90000 } ], "logging_steps": 10, "max_steps": 114490, "num_train_epochs": 10, "save_steps": 5000, "total_flos": 1.900935170811556e+17, "trial_name": null, "trial_params": null }