{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9986835534976621, "global_step": 22000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.999092105860457e-05, "loss": 2.4805, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.9981842117209137e-05, "loss": 2.4355, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.99727631758137e-05, "loss": 1.317, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.996368423441827e-05, "loss": 1.5764, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.9954605293022836e-05, "loss": 2.4654, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.9945526351627404e-05, "loss": 3.0094, "step": 60 }, { "epoch": 0.0, "learning_rate": 1.993644741023197e-05, "loss": 1.9016, "step": 70 }, { "epoch": 0.0, "learning_rate": 1.9927368468836535e-05, "loss": 2.7273, "step": 80 }, { "epoch": 0.0, "learning_rate": 1.9918289527441103e-05, "loss": 2.0789, "step": 90 }, { "epoch": 0.0, "learning_rate": 1.9909210586045667e-05, "loss": 1.8132, "step": 100 }, { "epoch": 0.0, "learning_rate": 1.9900131644650234e-05, "loss": 2.7362, "step": 110 }, { "epoch": 0.01, "learning_rate": 1.9891052703254802e-05, "loss": 1.1584, "step": 120 }, { "epoch": 0.01, "learning_rate": 1.9881973761859366e-05, "loss": 1.8474, "step": 130 }, { "epoch": 0.01, "learning_rate": 1.9872894820463934e-05, "loss": 1.2248, "step": 140 }, { "epoch": 0.01, "learning_rate": 1.98638158790685e-05, "loss": 1.8344, "step": 150 }, { "epoch": 0.01, "learning_rate": 1.985473693767307e-05, "loss": 1.5881, "step": 160 }, { "epoch": 0.01, "learning_rate": 1.9845657996277636e-05, "loss": 1.9684, "step": 170 }, { "epoch": 0.01, "learning_rate": 1.98365790548822e-05, "loss": 1.9283, "step": 180 }, { "epoch": 0.01, "learning_rate": 1.9827500113486768e-05, "loss": 1.776, "step": 190 }, { "epoch": 0.01, "learning_rate": 1.9818421172091336e-05, "loss": 1.5498, "step": 200 }, { "epoch": 0.01, "learning_rate": 1.9809342230695903e-05, "loss": 2.0404, "step": 210 }, { "epoch": 0.01, "learning_rate": 1.980026328930047e-05, "loss": 2.0609, "step": 220 }, { "epoch": 0.01, "learning_rate": 1.9791184347905038e-05, "loss": 1.2291, "step": 230 }, { "epoch": 0.01, "learning_rate": 1.9782105406509602e-05, "loss": 0.6943, "step": 240 }, { "epoch": 0.01, "learning_rate": 1.977302646511417e-05, "loss": 1.5242, "step": 250 }, { "epoch": 0.01, "learning_rate": 1.9763947523718737e-05, "loss": 1.5855, "step": 260 }, { "epoch": 0.01, "learning_rate": 1.9754868582323305e-05, "loss": 1.6128, "step": 270 }, { "epoch": 0.01, "learning_rate": 1.974578964092787e-05, "loss": 1.4307, "step": 280 }, { "epoch": 0.01, "learning_rate": 1.9736710699532437e-05, "loss": 1.402, "step": 290 }, { "epoch": 0.01, "learning_rate": 1.9727631758137e-05, "loss": 1.4633, "step": 300 }, { "epoch": 0.01, "learning_rate": 1.971855281674157e-05, "loss": 1.0023, "step": 310 }, { "epoch": 0.01, "learning_rate": 1.9709473875346136e-05, "loss": 1.2508, "step": 320 }, { "epoch": 0.01, "learning_rate": 1.9700394933950703e-05, "loss": 1.4191, "step": 330 }, { "epoch": 0.02, "learning_rate": 1.9691315992555268e-05, "loss": 1.0203, "step": 340 }, { "epoch": 0.02, "learning_rate": 1.9682237051159835e-05, "loss": 1.2482, "step": 350 }, { "epoch": 0.02, "learning_rate": 1.9673158109764403e-05, "loss": 1.108, "step": 360 }, { "epoch": 0.02, "learning_rate": 1.966407916836897e-05, "loss": 0.8581, "step": 370 }, { "epoch": 0.02, "learning_rate": 1.9655000226973538e-05, "loss": 1.307, "step": 380 }, { "epoch": 0.02, "learning_rate": 1.9645921285578102e-05, "loss": 1.0133, "step": 390 }, { "epoch": 0.02, "learning_rate": 1.963684234418267e-05, "loss": 0.9578, "step": 400 }, { "epoch": 0.02, "learning_rate": 1.9627763402787237e-05, "loss": 1.2262, "step": 410 }, { "epoch": 0.02, "learning_rate": 1.9618684461391805e-05, "loss": 1.075, "step": 420 }, { "epoch": 0.02, "learning_rate": 1.9609605519996372e-05, "loss": 1.1379, "step": 430 }, { "epoch": 0.02, "learning_rate": 1.9600526578600936e-05, "loss": 1.1068, "step": 440 }, { "epoch": 0.02, "learning_rate": 1.9591447637205504e-05, "loss": 0.9898, "step": 450 }, { "epoch": 0.02, "learning_rate": 1.958236869581007e-05, "loss": 0.7063, "step": 460 }, { "epoch": 0.02, "learning_rate": 1.9573289754414636e-05, "loss": 1.0111, "step": 470 }, { "epoch": 0.02, "learning_rate": 1.9564210813019203e-05, "loss": 0.9605, "step": 480 }, { "epoch": 0.02, "learning_rate": 1.955513187162377e-05, "loss": 0.8791, "step": 490 }, { "epoch": 0.02, "learning_rate": 1.9546052930228335e-05, "loss": 0.8611, "step": 500 }, { "epoch": 0.02, "eval_accuracy": 0.5282472235634958, "eval_loss": 0.9476923942565918, "eval_runtime": 73.7994, "eval_samples_per_second": 56.125, "eval_steps_per_second": 14.038, "step": 500 }, { "epoch": 0.02, "learning_rate": 1.9536973988832902e-05, "loss": 0.7687, "step": 510 }, { "epoch": 0.02, "learning_rate": 1.952789504743747e-05, "loss": 1.0031, "step": 520 }, { "epoch": 0.02, "learning_rate": 1.9518816106042037e-05, "loss": 1.0047, "step": 530 }, { "epoch": 0.02, "learning_rate": 1.95097371646466e-05, "loss": 1.198, "step": 540 }, { "epoch": 0.02, "learning_rate": 1.950065822325117e-05, "loss": 1.1266, "step": 550 }, { "epoch": 0.03, "learning_rate": 1.9491579281855737e-05, "loss": 0.9148, "step": 560 }, { "epoch": 0.03, "learning_rate": 1.9482500340460304e-05, "loss": 0.9363, "step": 570 }, { "epoch": 0.03, "learning_rate": 1.9473421399064872e-05, "loss": 1.1197, "step": 580 }, { "epoch": 0.03, "learning_rate": 1.946434245766944e-05, "loss": 0.8326, "step": 590 }, { "epoch": 0.03, "learning_rate": 1.9455263516274003e-05, "loss": 1.0715, "step": 600 }, { "epoch": 0.03, "learning_rate": 1.944618457487857e-05, "loss": 0.8307, "step": 610 }, { "epoch": 0.03, "learning_rate": 1.943710563348314e-05, "loss": 0.6955, "step": 620 }, { "epoch": 0.03, "learning_rate": 1.9428026692087706e-05, "loss": 0.9908, "step": 630 }, { "epoch": 0.03, "learning_rate": 1.9418947750692274e-05, "loss": 0.7029, "step": 640 }, { "epoch": 0.03, "learning_rate": 1.9409868809296838e-05, "loss": 0.8758, "step": 650 }, { "epoch": 0.03, "learning_rate": 1.9400789867901405e-05, "loss": 0.858, "step": 660 }, { "epoch": 0.03, "learning_rate": 1.939171092650597e-05, "loss": 0.6072, "step": 670 }, { "epoch": 0.03, "learning_rate": 1.9382631985110537e-05, "loss": 0.7605, "step": 680 }, { "epoch": 0.03, "learning_rate": 1.9373553043715105e-05, "loss": 0.6867, "step": 690 }, { "epoch": 0.03, "learning_rate": 1.936447410231967e-05, "loss": 0.8187, "step": 700 }, { "epoch": 0.03, "learning_rate": 1.9355395160924236e-05, "loss": 0.7312, "step": 710 }, { "epoch": 0.03, "learning_rate": 1.9346316219528804e-05, "loss": 0.7791, "step": 720 }, { "epoch": 0.03, "learning_rate": 1.933723727813337e-05, "loss": 0.8156, "step": 730 }, { "epoch": 0.03, "learning_rate": 1.932815833673794e-05, "loss": 0.9059, "step": 740 }, { "epoch": 0.03, "learning_rate": 1.9319079395342503e-05, "loss": 0.7408, "step": 750 }, { "epoch": 0.03, "learning_rate": 1.931000045394707e-05, "loss": 0.8867, "step": 760 }, { "epoch": 0.03, "learning_rate": 1.9300921512551638e-05, "loss": 0.6566, "step": 770 }, { "epoch": 0.04, "learning_rate": 1.9291842571156206e-05, "loss": 0.7961, "step": 780 }, { "epoch": 0.04, "learning_rate": 1.9282763629760773e-05, "loss": 0.7789, "step": 790 }, { "epoch": 0.04, "learning_rate": 1.9273684688365337e-05, "loss": 0.9086, "step": 800 }, { "epoch": 0.04, "learning_rate": 1.9264605746969905e-05, "loss": 0.7547, "step": 810 }, { "epoch": 0.04, "learning_rate": 1.9255526805574472e-05, "loss": 0.7371, "step": 820 }, { "epoch": 0.04, "learning_rate": 1.924644786417904e-05, "loss": 0.6797, "step": 830 }, { "epoch": 0.04, "learning_rate": 1.9237368922783604e-05, "loss": 0.6729, "step": 840 }, { "epoch": 0.04, "learning_rate": 1.922828998138817e-05, "loss": 0.6578, "step": 850 }, { "epoch": 0.04, "learning_rate": 1.921921103999274e-05, "loss": 0.7986, "step": 860 }, { "epoch": 0.04, "learning_rate": 1.9210132098597303e-05, "loss": 0.691, "step": 870 }, { "epoch": 0.04, "learning_rate": 1.920105315720187e-05, "loss": 0.7988, "step": 880 }, { "epoch": 0.04, "learning_rate": 1.919197421580644e-05, "loss": 0.7607, "step": 890 }, { "epoch": 0.04, "learning_rate": 1.9182895274411003e-05, "loss": 0.6561, "step": 900 }, { "epoch": 0.04, "learning_rate": 1.917381633301557e-05, "loss": 0.71, "step": 910 }, { "epoch": 0.04, "learning_rate": 1.9164737391620138e-05, "loss": 0.7926, "step": 920 }, { "epoch": 0.04, "learning_rate": 1.9155658450224705e-05, "loss": 0.8121, "step": 930 }, { "epoch": 0.04, "learning_rate": 1.9146579508829273e-05, "loss": 0.6301, "step": 940 }, { "epoch": 0.04, "learning_rate": 1.913750056743384e-05, "loss": 0.6631, "step": 950 }, { "epoch": 0.04, "learning_rate": 1.9128421626038404e-05, "loss": 0.774, "step": 960 }, { "epoch": 0.04, "learning_rate": 1.9119342684642972e-05, "loss": 0.6559, "step": 970 }, { "epoch": 0.04, "learning_rate": 1.911026374324754e-05, "loss": 0.7158, "step": 980 }, { "epoch": 0.04, "learning_rate": 1.9101184801852107e-05, "loss": 0.8125, "step": 990 }, { "epoch": 0.05, "learning_rate": 1.9092105860456675e-05, "loss": 0.8473, "step": 1000 }, { "epoch": 0.05, "eval_accuracy": 0.5444229840656688, "eval_loss": 0.7574031949043274, "eval_runtime": 73.6682, "eval_samples_per_second": 56.225, "eval_steps_per_second": 14.063, "step": 1000 }, { "epoch": 0.05, "learning_rate": 1.908302691906124e-05, "loss": 0.6639, "step": 1010 }, { "epoch": 0.05, "learning_rate": 1.9073947977665806e-05, "loss": 0.7871, "step": 1020 }, { "epoch": 0.05, "learning_rate": 1.9064869036270374e-05, "loss": 0.651, "step": 1030 }, { "epoch": 0.05, "learning_rate": 1.9055790094874938e-05, "loss": 0.7703, "step": 1040 }, { "epoch": 0.05, "learning_rate": 1.9046711153479506e-05, "loss": 0.7203, "step": 1050 }, { "epoch": 0.05, "learning_rate": 1.9037632212084073e-05, "loss": 0.7766, "step": 1060 }, { "epoch": 0.05, "learning_rate": 1.9028553270688637e-05, "loss": 0.6488, "step": 1070 }, { "epoch": 0.05, "learning_rate": 1.9019474329293205e-05, "loss": 0.9973, "step": 1080 }, { "epoch": 0.05, "learning_rate": 1.9010395387897772e-05, "loss": 0.7613, "step": 1090 }, { "epoch": 0.05, "learning_rate": 1.900131644650234e-05, "loss": 0.6891, "step": 1100 }, { "epoch": 0.05, "learning_rate": 1.8992237505106904e-05, "loss": 0.6699, "step": 1110 }, { "epoch": 0.05, "learning_rate": 1.898315856371147e-05, "loss": 0.6811, "step": 1120 }, { "epoch": 0.05, "learning_rate": 1.897407962231604e-05, "loss": 0.8492, "step": 1130 }, { "epoch": 0.05, "learning_rate": 1.8965000680920607e-05, "loss": 0.7957, "step": 1140 }, { "epoch": 0.05, "learning_rate": 1.8955921739525174e-05, "loss": 0.759, "step": 1150 }, { "epoch": 0.05, "learning_rate": 1.894684279812974e-05, "loss": 0.8492, "step": 1160 }, { "epoch": 0.05, "learning_rate": 1.8937763856734306e-05, "loss": 0.7164, "step": 1170 }, { "epoch": 0.05, "learning_rate": 1.8928684915338873e-05, "loss": 0.6832, "step": 1180 }, { "epoch": 0.05, "learning_rate": 1.891960597394344e-05, "loss": 0.793, "step": 1190 }, { "epoch": 0.05, "learning_rate": 1.891052703254801e-05, "loss": 0.725, "step": 1200 }, { "epoch": 0.05, "learning_rate": 1.8901448091152573e-05, "loss": 0.6547, "step": 1210 }, { "epoch": 0.06, "learning_rate": 1.889236914975714e-05, "loss": 0.6254, "step": 1220 }, { "epoch": 0.06, "learning_rate": 1.8883290208361708e-05, "loss": 0.7918, "step": 1230 }, { "epoch": 0.06, "learning_rate": 1.8874211266966272e-05, "loss": 0.8402, "step": 1240 }, { "epoch": 0.06, "learning_rate": 1.886513232557084e-05, "loss": 0.6934, "step": 1250 }, { "epoch": 0.06, "learning_rate": 1.8856053384175407e-05, "loss": 0.691, "step": 1260 }, { "epoch": 0.06, "learning_rate": 1.884697444277997e-05, "loss": 0.6693, "step": 1270 }, { "epoch": 0.06, "learning_rate": 1.883789550138454e-05, "loss": 0.827, "step": 1280 }, { "epoch": 0.06, "learning_rate": 1.8828816559989106e-05, "loss": 0.6457, "step": 1290 }, { "epoch": 0.06, "learning_rate": 1.8819737618593674e-05, "loss": 0.6223, "step": 1300 }, { "epoch": 0.06, "learning_rate": 1.8810658677198238e-05, "loss": 0.7271, "step": 1310 }, { "epoch": 0.06, "learning_rate": 1.8801579735802806e-05, "loss": 0.85, "step": 1320 }, { "epoch": 0.06, "learning_rate": 1.8792500794407373e-05, "loss": 0.7082, "step": 1330 }, { "epoch": 0.06, "learning_rate": 1.878342185301194e-05, "loss": 0.7145, "step": 1340 }, { "epoch": 0.06, "learning_rate": 1.8774342911616508e-05, "loss": 0.8406, "step": 1350 }, { "epoch": 0.06, "learning_rate": 1.8765263970221076e-05, "loss": 0.6855, "step": 1360 }, { "epoch": 0.06, "learning_rate": 1.875618502882564e-05, "loss": 0.6941, "step": 1370 }, { "epoch": 0.06, "learning_rate": 1.8747106087430207e-05, "loss": 0.6742, "step": 1380 }, { "epoch": 0.06, "learning_rate": 1.8738027146034775e-05, "loss": 0.6928, "step": 1390 }, { "epoch": 0.06, "learning_rate": 1.8728948204639342e-05, "loss": 0.7762, "step": 1400 }, { "epoch": 0.06, "learning_rate": 1.8719869263243907e-05, "loss": 0.8305, "step": 1410 }, { "epoch": 0.06, "learning_rate": 1.8710790321848474e-05, "loss": 0.6861, "step": 1420 }, { "epoch": 0.06, "learning_rate": 1.8701711380453042e-05, "loss": 0.7648, "step": 1430 }, { "epoch": 0.07, "learning_rate": 1.8692632439057606e-05, "loss": 0.7822, "step": 1440 }, { "epoch": 0.07, "learning_rate": 1.8683553497662173e-05, "loss": 0.7717, "step": 1450 }, { "epoch": 0.07, "learning_rate": 1.867447455626674e-05, "loss": 0.6504, "step": 1460 }, { "epoch": 0.07, "learning_rate": 1.8665395614871305e-05, "loss": 0.7363, "step": 1470 }, { "epoch": 0.07, "learning_rate": 1.8656316673475873e-05, "loss": 0.6836, "step": 1480 }, { "epoch": 0.07, "learning_rate": 1.864723773208044e-05, "loss": 0.8811, "step": 1490 }, { "epoch": 0.07, "learning_rate": 1.8638158790685008e-05, "loss": 0.7086, "step": 1500 }, { "epoch": 0.07, "eval_accuracy": 0.5596330275229358, "eval_loss": 0.7176767587661743, "eval_runtime": 74.531, "eval_samples_per_second": 55.574, "eval_steps_per_second": 13.9, "step": 1500 }, { "epoch": 0.07, "learning_rate": 1.8629079849289575e-05, "loss": 0.6834, "step": 1510 }, { "epoch": 0.07, "learning_rate": 1.862000090789414e-05, "loss": 0.7777, "step": 1520 }, { "epoch": 0.07, "learning_rate": 1.8610921966498707e-05, "loss": 0.6586, "step": 1530 }, { "epoch": 0.07, "learning_rate": 1.8601843025103275e-05, "loss": 0.7246, "step": 1540 }, { "epoch": 0.07, "learning_rate": 1.8592764083707842e-05, "loss": 0.7203, "step": 1550 }, { "epoch": 0.07, "learning_rate": 1.858368514231241e-05, "loss": 0.7113, "step": 1560 }, { "epoch": 0.07, "learning_rate": 1.8574606200916974e-05, "loss": 0.7828, "step": 1570 }, { "epoch": 0.07, "learning_rate": 1.856552725952154e-05, "loss": 0.7518, "step": 1580 }, { "epoch": 0.07, "learning_rate": 1.855644831812611e-05, "loss": 0.693, "step": 1590 }, { "epoch": 0.07, "learning_rate": 1.8547369376730676e-05, "loss": 0.7, "step": 1600 }, { "epoch": 0.07, "learning_rate": 1.853829043533524e-05, "loss": 0.65, "step": 1610 }, { "epoch": 0.07, "learning_rate": 1.8529211493939808e-05, "loss": 0.6734, "step": 1620 }, { "epoch": 0.07, "learning_rate": 1.8520132552544376e-05, "loss": 0.7648, "step": 1630 }, { "epoch": 0.07, "learning_rate": 1.851105361114894e-05, "loss": 0.6742, "step": 1640 }, { "epoch": 0.07, "learning_rate": 1.8501974669753507e-05, "loss": 0.7734, "step": 1650 }, { "epoch": 0.08, "learning_rate": 1.8492895728358075e-05, "loss": 0.7621, "step": 1660 }, { "epoch": 0.08, "learning_rate": 1.848381678696264e-05, "loss": 0.6535, "step": 1670 }, { "epoch": 0.08, "learning_rate": 1.8474737845567207e-05, "loss": 0.7879, "step": 1680 }, { "epoch": 0.08, "learning_rate": 1.8465658904171774e-05, "loss": 0.6699, "step": 1690 }, { "epoch": 0.08, "learning_rate": 1.845657996277634e-05, "loss": 0.6672, "step": 1700 }, { "epoch": 0.08, "learning_rate": 1.844750102138091e-05, "loss": 0.759, "step": 1710 }, { "epoch": 0.08, "learning_rate": 1.8438422079985477e-05, "loss": 0.7328, "step": 1720 }, { "epoch": 0.08, "learning_rate": 1.842934313859004e-05, "loss": 0.7902, "step": 1730 }, { "epoch": 0.08, "learning_rate": 1.842026419719461e-05, "loss": 0.7342, "step": 1740 }, { "epoch": 0.08, "learning_rate": 1.8411185255799176e-05, "loss": 0.7047, "step": 1750 }, { "epoch": 0.08, "learning_rate": 1.8402106314403744e-05, "loss": 0.7137, "step": 1760 }, { "epoch": 0.08, "learning_rate": 1.839302737300831e-05, "loss": 0.6871, "step": 1770 }, { "epoch": 0.08, "learning_rate": 1.8383948431612875e-05, "loss": 0.6461, "step": 1780 }, { "epoch": 0.08, "learning_rate": 1.8374869490217443e-05, "loss": 0.7117, "step": 1790 }, { "epoch": 0.08, "learning_rate": 1.836579054882201e-05, "loss": 0.7719, "step": 1800 }, { "epoch": 0.08, "learning_rate": 1.8356711607426574e-05, "loss": 0.65, "step": 1810 }, { "epoch": 0.08, "learning_rate": 1.8347632666031142e-05, "loss": 0.7473, "step": 1820 }, { "epoch": 0.08, "learning_rate": 1.833855372463571e-05, "loss": 0.7457, "step": 1830 }, { "epoch": 0.08, "learning_rate": 1.8329474783240274e-05, "loss": 0.7027, "step": 1840 }, { "epoch": 0.08, "learning_rate": 1.832039584184484e-05, "loss": 0.7066, "step": 1850 }, { "epoch": 0.08, "learning_rate": 1.831131690044941e-05, "loss": 0.7414, "step": 1860 }, { "epoch": 0.08, "learning_rate": 1.8302237959053976e-05, "loss": 0.7328, "step": 1870 }, { "epoch": 0.09, "learning_rate": 1.829315901765854e-05, "loss": 0.7215, "step": 1880 }, { "epoch": 0.09, "learning_rate": 1.8284080076263108e-05, "loss": 0.623, "step": 1890 }, { "epoch": 0.09, "learning_rate": 1.8275001134867676e-05, "loss": 0.7512, "step": 1900 }, { "epoch": 0.09, "learning_rate": 1.8265922193472243e-05, "loss": 0.7398, "step": 1910 }, { "epoch": 0.09, "learning_rate": 1.825684325207681e-05, "loss": 0.7441, "step": 1920 }, { "epoch": 0.09, "learning_rate": 1.8247764310681375e-05, "loss": 0.6504, "step": 1930 }, { "epoch": 0.09, "learning_rate": 1.8238685369285942e-05, "loss": 0.7625, "step": 1940 }, { "epoch": 0.09, "learning_rate": 1.822960642789051e-05, "loss": 0.668, "step": 1950 }, { "epoch": 0.09, "learning_rate": 1.8220527486495077e-05, "loss": 0.6961, "step": 1960 }, { "epoch": 0.09, "learning_rate": 1.8211448545099645e-05, "loss": 0.7477, "step": 1970 }, { "epoch": 0.09, "learning_rate": 1.820236960370421e-05, "loss": 0.7844, "step": 1980 }, { "epoch": 0.09, "learning_rate": 1.8193290662308777e-05, "loss": 0.6893, "step": 1990 }, { "epoch": 0.09, "learning_rate": 1.8184211720913344e-05, "loss": 0.7223, "step": 2000 }, { "epoch": 0.09, "eval_accuracy": 0.5642201834862385, "eval_loss": 0.7053582072257996, "eval_runtime": 73.9274, "eval_samples_per_second": 56.028, "eval_steps_per_second": 14.014, "step": 2000 }, { "epoch": 0.09, "learning_rate": 1.817513277951791e-05, "loss": 0.6996, "step": 2010 }, { "epoch": 0.09, "learning_rate": 1.8166053838122476e-05, "loss": 0.6939, "step": 2020 }, { "epoch": 0.09, "learning_rate": 1.8156974896727043e-05, "loss": 0.7418, "step": 2030 }, { "epoch": 0.09, "learning_rate": 1.8147895955331608e-05, "loss": 0.7295, "step": 2040 }, { "epoch": 0.09, "learning_rate": 1.8138817013936175e-05, "loss": 0.6215, "step": 2050 }, { "epoch": 0.09, "learning_rate": 1.8129738072540743e-05, "loss": 0.6334, "step": 2060 }, { "epoch": 0.09, "learning_rate": 1.812065913114531e-05, "loss": 0.7109, "step": 2070 }, { "epoch": 0.09, "learning_rate": 1.8111580189749878e-05, "loss": 0.6764, "step": 2080 }, { "epoch": 0.09, "learning_rate": 1.8102501248354442e-05, "loss": 0.6371, "step": 2090 }, { "epoch": 0.1, "learning_rate": 1.809342230695901e-05, "loss": 0.6674, "step": 2100 }, { "epoch": 0.1, "learning_rate": 1.8084343365563577e-05, "loss": 0.6689, "step": 2110 }, { "epoch": 0.1, "learning_rate": 1.8075264424168145e-05, "loss": 0.6523, "step": 2120 }, { "epoch": 0.1, "learning_rate": 1.8066185482772712e-05, "loss": 0.6008, "step": 2130 }, { "epoch": 0.1, "learning_rate": 1.8057106541377276e-05, "loss": 0.742, "step": 2140 }, { "epoch": 0.1, "learning_rate": 1.8048027599981844e-05, "loss": 0.7527, "step": 2150 }, { "epoch": 0.1, "learning_rate": 1.803894865858641e-05, "loss": 0.7352, "step": 2160 }, { "epoch": 0.1, "learning_rate": 1.802986971719098e-05, "loss": 0.6502, "step": 2170 }, { "epoch": 0.1, "learning_rate": 1.8020790775795543e-05, "loss": 0.7156, "step": 2180 }, { "epoch": 0.1, "learning_rate": 1.801171183440011e-05, "loss": 0.7008, "step": 2190 }, { "epoch": 0.1, "learning_rate": 1.8002632893004678e-05, "loss": 0.6953, "step": 2200 }, { "epoch": 0.1, "learning_rate": 1.7993553951609242e-05, "loss": 0.7641, "step": 2210 }, { "epoch": 0.1, "learning_rate": 1.798447501021381e-05, "loss": 0.7266, "step": 2220 }, { "epoch": 0.1, "learning_rate": 1.7975396068818377e-05, "loss": 0.657, "step": 2230 }, { "epoch": 0.1, "learning_rate": 1.796631712742294e-05, "loss": 0.6883, "step": 2240 }, { "epoch": 0.1, "learning_rate": 1.795723818602751e-05, "loss": 0.699, "step": 2250 }, { "epoch": 0.1, "learning_rate": 1.7948159244632077e-05, "loss": 0.6229, "step": 2260 }, { "epoch": 0.1, "learning_rate": 1.7939080303236644e-05, "loss": 0.6969, "step": 2270 }, { "epoch": 0.1, "learning_rate": 1.7930001361841212e-05, "loss": 0.7721, "step": 2280 }, { "epoch": 0.1, "learning_rate": 1.7920922420445776e-05, "loss": 0.7125, "step": 2290 }, { "epoch": 0.1, "learning_rate": 1.7911843479050343e-05, "loss": 0.7691, "step": 2300 }, { "epoch": 0.1, "learning_rate": 1.790276453765491e-05, "loss": 0.7262, "step": 2310 }, { "epoch": 0.11, "learning_rate": 1.789368559625948e-05, "loss": 0.7363, "step": 2320 }, { "epoch": 0.11, "learning_rate": 1.7884606654864046e-05, "loss": 0.6568, "step": 2330 }, { "epoch": 0.11, "learning_rate": 1.787552771346861e-05, "loss": 0.6848, "step": 2340 }, { "epoch": 0.11, "learning_rate": 1.7866448772073178e-05, "loss": 0.623, "step": 2350 }, { "epoch": 0.11, "learning_rate": 1.7857369830677745e-05, "loss": 0.7602, "step": 2360 }, { "epoch": 0.11, "learning_rate": 1.7848290889282313e-05, "loss": 0.5975, "step": 2370 }, { "epoch": 0.11, "learning_rate": 1.7839211947886877e-05, "loss": 0.6773, "step": 2380 }, { "epoch": 0.11, "learning_rate": 1.7830133006491445e-05, "loss": 0.7605, "step": 2390 }, { "epoch": 0.11, "learning_rate": 1.7821054065096012e-05, "loss": 0.7703, "step": 2400 }, { "epoch": 0.11, "learning_rate": 1.7811975123700576e-05, "loss": 0.7121, "step": 2410 }, { "epoch": 0.11, "learning_rate": 1.7802896182305144e-05, "loss": 0.7508, "step": 2420 }, { "epoch": 0.11, "learning_rate": 1.779381724090971e-05, "loss": 0.6875, "step": 2430 }, { "epoch": 0.11, "learning_rate": 1.7784738299514275e-05, "loss": 0.7035, "step": 2440 }, { "epoch": 0.11, "learning_rate": 1.7775659358118843e-05, "loss": 0.7422, "step": 2450 }, { "epoch": 0.11, "learning_rate": 1.776658041672341e-05, "loss": 0.6859, "step": 2460 }, { "epoch": 0.11, "learning_rate": 1.7757501475327978e-05, "loss": 0.7377, "step": 2470 }, { "epoch": 0.11, "learning_rate": 1.7748422533932546e-05, "loss": 0.7625, "step": 2480 }, { "epoch": 0.11, "learning_rate": 1.7739343592537113e-05, "loss": 0.6867, "step": 2490 }, { "epoch": 0.11, "learning_rate": 1.7730264651141677e-05, "loss": 0.7092, "step": 2500 }, { "epoch": 0.11, "eval_accuracy": 0.5864316755190729, "eval_loss": 0.69435054063797, "eval_runtime": 74.0387, "eval_samples_per_second": 55.944, "eval_steps_per_second": 13.993, "step": 2500 }, { "epoch": 0.11, "learning_rate": 1.7721185709746245e-05, "loss": 0.7135, "step": 2510 }, { "epoch": 0.11, "learning_rate": 1.7712106768350812e-05, "loss": 0.6902, "step": 2520 }, { "epoch": 0.11, "learning_rate": 1.770302782695538e-05, "loss": 0.6463, "step": 2530 }, { "epoch": 0.12, "learning_rate": 1.7693948885559948e-05, "loss": 0.7086, "step": 2540 }, { "epoch": 0.12, "learning_rate": 1.768486994416451e-05, "loss": 0.743, "step": 2550 }, { "epoch": 0.12, "learning_rate": 1.767579100276908e-05, "loss": 0.727, "step": 2560 }, { "epoch": 0.12, "learning_rate": 1.7666712061373647e-05, "loss": 0.7523, "step": 2570 }, { "epoch": 0.12, "learning_rate": 1.765763311997821e-05, "loss": 0.6906, "step": 2580 }, { "epoch": 0.12, "learning_rate": 1.764855417858278e-05, "loss": 0.7262, "step": 2590 }, { "epoch": 0.12, "learning_rate": 1.7639475237187346e-05, "loss": 0.6633, "step": 2600 }, { "epoch": 0.12, "learning_rate": 1.763039629579191e-05, "loss": 0.6121, "step": 2610 }, { "epoch": 0.12, "learning_rate": 1.7621317354396478e-05, "loss": 0.6309, "step": 2620 }, { "epoch": 0.12, "learning_rate": 1.7612238413001045e-05, "loss": 0.6721, "step": 2630 }, { "epoch": 0.12, "learning_rate": 1.7603159471605613e-05, "loss": 0.6562, "step": 2640 }, { "epoch": 0.12, "learning_rate": 1.7594080530210177e-05, "loss": 0.6633, "step": 2650 }, { "epoch": 0.12, "learning_rate": 1.7585001588814745e-05, "loss": 0.7188, "step": 2660 }, { "epoch": 0.12, "learning_rate": 1.7575922647419312e-05, "loss": 0.7297, "step": 2670 }, { "epoch": 0.12, "learning_rate": 1.756684370602388e-05, "loss": 0.6438, "step": 2680 }, { "epoch": 0.12, "learning_rate": 1.7557764764628447e-05, "loss": 0.6961, "step": 2690 }, { "epoch": 0.12, "learning_rate": 1.754868582323301e-05, "loss": 0.6992, "step": 2700 }, { "epoch": 0.12, "learning_rate": 1.753960688183758e-05, "loss": 0.5748, "step": 2710 }, { "epoch": 0.12, "learning_rate": 1.7530527940442146e-05, "loss": 0.5975, "step": 2720 }, { "epoch": 0.12, "learning_rate": 1.7521448999046714e-05, "loss": 0.7008, "step": 2730 }, { "epoch": 0.12, "learning_rate": 1.751237005765128e-05, "loss": 0.8043, "step": 2740 }, { "epoch": 0.12, "learning_rate": 1.7503291116255846e-05, "loss": 0.7314, "step": 2750 }, { "epoch": 0.13, "learning_rate": 1.7494212174860413e-05, "loss": 0.6992, "step": 2760 }, { "epoch": 0.13, "learning_rate": 1.748513323346498e-05, "loss": 0.6832, "step": 2770 }, { "epoch": 0.13, "learning_rate": 1.7476054292069545e-05, "loss": 0.7281, "step": 2780 }, { "epoch": 0.13, "learning_rate": 1.7466975350674112e-05, "loss": 0.7684, "step": 2790 }, { "epoch": 0.13, "learning_rate": 1.745789640927868e-05, "loss": 0.693, "step": 2800 }, { "epoch": 0.13, "learning_rate": 1.7448817467883244e-05, "loss": 0.7365, "step": 2810 }, { "epoch": 0.13, "learning_rate": 1.743973852648781e-05, "loss": 0.6813, "step": 2820 }, { "epoch": 0.13, "learning_rate": 1.743065958509238e-05, "loss": 0.6824, "step": 2830 }, { "epoch": 0.13, "learning_rate": 1.7421580643696947e-05, "loss": 0.7217, "step": 2840 }, { "epoch": 0.13, "learning_rate": 1.7412501702301514e-05, "loss": 0.7031, "step": 2850 }, { "epoch": 0.13, "learning_rate": 1.740342276090608e-05, "loss": 0.7348, "step": 2860 }, { "epoch": 0.13, "learning_rate": 1.7394343819510646e-05, "loss": 0.6928, "step": 2870 }, { "epoch": 0.13, "learning_rate": 1.7385264878115214e-05, "loss": 0.7035, "step": 2880 }, { "epoch": 0.13, "learning_rate": 1.737618593671978e-05, "loss": 0.6838, "step": 2890 }, { "epoch": 0.13, "learning_rate": 1.736710699532435e-05, "loss": 0.6586, "step": 2900 }, { "epoch": 0.13, "learning_rate": 1.7358028053928913e-05, "loss": 0.6711, "step": 2910 }, { "epoch": 0.13, "learning_rate": 1.734894911253348e-05, "loss": 0.693, "step": 2920 }, { "epoch": 0.13, "learning_rate": 1.7339870171138048e-05, "loss": 0.701, "step": 2930 }, { "epoch": 0.13, "learning_rate": 1.7330791229742615e-05, "loss": 0.598, "step": 2940 }, { "epoch": 0.13, "learning_rate": 1.732171228834718e-05, "loss": 0.673, "step": 2950 }, { "epoch": 0.13, "learning_rate": 1.7312633346951747e-05, "loss": 0.6502, "step": 2960 }, { "epoch": 0.13, "learning_rate": 1.7303554405556315e-05, "loss": 0.7301, "step": 2970 }, { "epoch": 0.14, "learning_rate": 1.729447546416088e-05, "loss": 0.6961, "step": 2980 }, { "epoch": 0.14, "learning_rate": 1.7285396522765446e-05, "loss": 0.7199, "step": 2990 }, { "epoch": 0.14, "learning_rate": 1.7276317581370014e-05, "loss": 0.6605, "step": 3000 }, { "epoch": 0.14, "eval_accuracy": 0.5806373732496378, "eval_loss": 0.7019612193107605, "eval_runtime": 75.2508, "eval_samples_per_second": 55.043, "eval_steps_per_second": 13.767, "step": 3000 }, { "epoch": 0.14, "learning_rate": 1.7267238639974578e-05, "loss": 0.6375, "step": 3010 }, { "epoch": 0.14, "learning_rate": 1.7258159698579146e-05, "loss": 0.7408, "step": 3020 }, { "epoch": 0.14, "learning_rate": 1.7249080757183713e-05, "loss": 0.6504, "step": 3030 }, { "epoch": 0.14, "learning_rate": 1.724000181578828e-05, "loss": 0.7404, "step": 3040 }, { "epoch": 0.14, "learning_rate": 1.7230922874392848e-05, "loss": 0.7684, "step": 3050 }, { "epoch": 0.14, "learning_rate": 1.7221843932997412e-05, "loss": 0.6043, "step": 3060 }, { "epoch": 0.14, "learning_rate": 1.721276499160198e-05, "loss": 0.6562, "step": 3070 }, { "epoch": 0.14, "learning_rate": 1.7203686050206547e-05, "loss": 0.7309, "step": 3080 }, { "epoch": 0.14, "learning_rate": 1.7194607108811115e-05, "loss": 0.6188, "step": 3090 }, { "epoch": 0.14, "learning_rate": 1.7185528167415683e-05, "loss": 0.767, "step": 3100 }, { "epoch": 0.14, "learning_rate": 1.7176449226020247e-05, "loss": 0.7379, "step": 3110 }, { "epoch": 0.14, "learning_rate": 1.7167370284624814e-05, "loss": 0.732, "step": 3120 }, { "epoch": 0.14, "learning_rate": 1.7158291343229382e-05, "loss": 0.7145, "step": 3130 }, { "epoch": 0.14, "learning_rate": 1.714921240183395e-05, "loss": 0.7035, "step": 3140 }, { "epoch": 0.14, "learning_rate": 1.7140133460438513e-05, "loss": 0.759, "step": 3150 }, { "epoch": 0.14, "learning_rate": 1.713105451904308e-05, "loss": 0.7232, "step": 3160 }, { "epoch": 0.14, "learning_rate": 1.712197557764765e-05, "loss": 0.6621, "step": 3170 }, { "epoch": 0.14, "learning_rate": 1.7112896636252213e-05, "loss": 0.6104, "step": 3180 }, { "epoch": 0.14, "learning_rate": 1.710381769485678e-05, "loss": 0.7688, "step": 3190 }, { "epoch": 0.15, "learning_rate": 1.7094738753461348e-05, "loss": 0.6383, "step": 3200 }, { "epoch": 0.15, "learning_rate": 1.7085659812065915e-05, "loss": 0.6279, "step": 3210 }, { "epoch": 0.15, "learning_rate": 1.707658087067048e-05, "loss": 0.8711, "step": 3220 }, { "epoch": 0.15, "learning_rate": 1.7067501929275047e-05, "loss": 0.7449, "step": 3230 }, { "epoch": 0.15, "learning_rate": 1.7058422987879615e-05, "loss": 0.6668, "step": 3240 }, { "epoch": 0.15, "learning_rate": 1.7049344046484182e-05, "loss": 0.6992, "step": 3250 }, { "epoch": 0.15, "learning_rate": 1.704026510508875e-05, "loss": 0.6529, "step": 3260 }, { "epoch": 0.15, "learning_rate": 1.7031186163693314e-05, "loss": 0.6301, "step": 3270 }, { "epoch": 0.15, "learning_rate": 1.702210722229788e-05, "loss": 0.7, "step": 3280 }, { "epoch": 0.15, "learning_rate": 1.701302828090245e-05, "loss": 0.7004, "step": 3290 }, { "epoch": 0.15, "learning_rate": 1.7003949339507016e-05, "loss": 0.6744, "step": 3300 }, { "epoch": 0.15, "learning_rate": 1.6994870398111584e-05, "loss": 0.6875, "step": 3310 }, { "epoch": 0.15, "learning_rate": 1.6985791456716148e-05, "loss": 0.7172, "step": 3320 }, { "epoch": 0.15, "learning_rate": 1.6976712515320716e-05, "loss": 0.7213, "step": 3330 }, { "epoch": 0.15, "learning_rate": 1.6967633573925283e-05, "loss": 0.6945, "step": 3340 }, { "epoch": 0.15, "learning_rate": 1.6958554632529847e-05, "loss": 0.6578, "step": 3350 }, { "epoch": 0.15, "learning_rate": 1.6949475691134415e-05, "loss": 0.7834, "step": 3360 }, { "epoch": 0.15, "learning_rate": 1.6940396749738982e-05, "loss": 0.6871, "step": 3370 }, { "epoch": 0.15, "learning_rate": 1.6931317808343547e-05, "loss": 0.6965, "step": 3380 }, { "epoch": 0.15, "learning_rate": 1.6922238866948114e-05, "loss": 0.6832, "step": 3390 }, { "epoch": 0.15, "learning_rate": 1.6913159925552682e-05, "loss": 0.6949, "step": 3400 }, { "epoch": 0.15, "learning_rate": 1.690408098415725e-05, "loss": 0.7303, "step": 3410 }, { "epoch": 0.16, "learning_rate": 1.6895002042761813e-05, "loss": 0.7215, "step": 3420 }, { "epoch": 0.16, "learning_rate": 1.688592310136638e-05, "loss": 0.6664, "step": 3430 }, { "epoch": 0.16, "learning_rate": 1.687684415997095e-05, "loss": 0.7107, "step": 3440 }, { "epoch": 0.16, "learning_rate": 1.6867765218575516e-05, "loss": 0.7289, "step": 3450 }, { "epoch": 0.16, "learning_rate": 1.6858686277180084e-05, "loss": 0.6801, "step": 3460 }, { "epoch": 0.16, "learning_rate": 1.6849607335784648e-05, "loss": 0.6475, "step": 3470 }, { "epoch": 0.16, "learning_rate": 1.6840528394389215e-05, "loss": 0.6285, "step": 3480 }, { "epoch": 0.16, "learning_rate": 1.6831449452993783e-05, "loss": 0.6699, "step": 3490 }, { "epoch": 0.16, "learning_rate": 1.682237051159835e-05, "loss": 0.7424, "step": 3500 }, { "epoch": 0.16, "eval_accuracy": 0.5782230806373733, "eval_loss": 0.6958387494087219, "eval_runtime": 74.8034, "eval_samples_per_second": 55.372, "eval_steps_per_second": 13.85, "step": 3500 }, { "epoch": 0.16, "learning_rate": 1.6813291570202918e-05, "loss": 0.676, "step": 3510 }, { "epoch": 0.16, "learning_rate": 1.6804212628807482e-05, "loss": 0.5826, "step": 3520 }, { "epoch": 0.16, "learning_rate": 1.679513368741205e-05, "loss": 0.7164, "step": 3530 }, { "epoch": 0.16, "learning_rate": 1.6786054746016617e-05, "loss": 0.8461, "step": 3540 }, { "epoch": 0.16, "learning_rate": 1.677697580462118e-05, "loss": 0.7219, "step": 3550 }, { "epoch": 0.16, "learning_rate": 1.676789686322575e-05, "loss": 0.7793, "step": 3560 }, { "epoch": 0.16, "learning_rate": 1.6758817921830316e-05, "loss": 0.6773, "step": 3570 }, { "epoch": 0.16, "learning_rate": 1.674973898043488e-05, "loss": 0.6342, "step": 3580 }, { "epoch": 0.16, "learning_rate": 1.6740660039039448e-05, "loss": 0.6426, "step": 3590 }, { "epoch": 0.16, "learning_rate": 1.6731581097644016e-05, "loss": 0.8352, "step": 3600 }, { "epoch": 0.16, "learning_rate": 1.6722502156248583e-05, "loss": 0.673, "step": 3610 }, { "epoch": 0.16, "learning_rate": 1.671342321485315e-05, "loss": 0.6305, "step": 3620 }, { "epoch": 0.16, "learning_rate": 1.6704344273457715e-05, "loss": 0.6246, "step": 3630 }, { "epoch": 0.17, "learning_rate": 1.6695265332062282e-05, "loss": 0.6191, "step": 3640 }, { "epoch": 0.17, "learning_rate": 1.668618639066685e-05, "loss": 0.6887, "step": 3650 }, { "epoch": 0.17, "learning_rate": 1.6677107449271418e-05, "loss": 0.6418, "step": 3660 }, { "epoch": 0.17, "learning_rate": 1.6668028507875985e-05, "loss": 0.727, "step": 3670 }, { "epoch": 0.17, "learning_rate": 1.665894956648055e-05, "loss": 0.6848, "step": 3680 }, { "epoch": 0.17, "learning_rate": 1.6649870625085117e-05, "loss": 0.7105, "step": 3690 }, { "epoch": 0.17, "learning_rate": 1.6640791683689684e-05, "loss": 0.7939, "step": 3700 }, { "epoch": 0.17, "learning_rate": 1.6631712742294252e-05, "loss": 0.6701, "step": 3710 }, { "epoch": 0.17, "learning_rate": 1.6622633800898816e-05, "loss": 0.7344, "step": 3720 }, { "epoch": 0.17, "learning_rate": 1.6613554859503384e-05, "loss": 0.6816, "step": 3730 }, { "epoch": 0.17, "learning_rate": 1.660447591810795e-05, "loss": 0.6516, "step": 3740 }, { "epoch": 0.17, "learning_rate": 1.6595396976712515e-05, "loss": 0.6641, "step": 3750 }, { "epoch": 0.17, "learning_rate": 1.6586318035317083e-05, "loss": 0.6396, "step": 3760 }, { "epoch": 0.17, "learning_rate": 1.657723909392165e-05, "loss": 0.6596, "step": 3770 }, { "epoch": 0.17, "learning_rate": 1.6568160152526214e-05, "loss": 0.6672, "step": 3780 }, { "epoch": 0.17, "learning_rate": 1.6559081211130782e-05, "loss": 0.7094, "step": 3790 }, { "epoch": 0.17, "learning_rate": 1.655000226973535e-05, "loss": 0.7512, "step": 3800 }, { "epoch": 0.17, "learning_rate": 1.6540923328339917e-05, "loss": 0.7082, "step": 3810 }, { "epoch": 0.17, "learning_rate": 1.6531844386944485e-05, "loss": 0.6738, "step": 3820 }, { "epoch": 0.17, "learning_rate": 1.652276544554905e-05, "loss": 0.659, "step": 3830 }, { "epoch": 0.17, "learning_rate": 1.6513686504153616e-05, "loss": 0.725, "step": 3840 }, { "epoch": 0.17, "learning_rate": 1.6504607562758184e-05, "loss": 0.6949, "step": 3850 }, { "epoch": 0.18, "learning_rate": 1.649552862136275e-05, "loss": 0.7621, "step": 3860 }, { "epoch": 0.18, "learning_rate": 1.648644967996732e-05, "loss": 0.5955, "step": 3870 }, { "epoch": 0.18, "learning_rate": 1.6477370738571883e-05, "loss": 0.7156, "step": 3880 }, { "epoch": 0.18, "learning_rate": 1.646829179717645e-05, "loss": 0.7133, "step": 3890 }, { "epoch": 0.18, "learning_rate": 1.6459212855781018e-05, "loss": 0.5943, "step": 3900 }, { "epoch": 0.18, "learning_rate": 1.6450133914385586e-05, "loss": 0.7309, "step": 3910 }, { "epoch": 0.18, "learning_rate": 1.644105497299015e-05, "loss": 0.6729, "step": 3920 }, { "epoch": 0.18, "learning_rate": 1.6431976031594717e-05, "loss": 0.6658, "step": 3930 }, { "epoch": 0.18, "learning_rate": 1.6422897090199285e-05, "loss": 0.592, "step": 3940 }, { "epoch": 0.18, "learning_rate": 1.641381814880385e-05, "loss": 0.6889, "step": 3950 }, { "epoch": 0.18, "learning_rate": 1.6404739207408417e-05, "loss": 0.6473, "step": 3960 }, { "epoch": 0.18, "learning_rate": 1.6395660266012984e-05, "loss": 0.6818, "step": 3970 }, { "epoch": 0.18, "learning_rate": 1.6386581324617552e-05, "loss": 0.6381, "step": 3980 }, { "epoch": 0.18, "learning_rate": 1.6377502383222116e-05, "loss": 0.7305, "step": 3990 }, { "epoch": 0.18, "learning_rate": 1.6368423441826683e-05, "loss": 0.6549, "step": 4000 }, { "epoch": 0.18, "eval_accuracy": 0.5782230806373733, "eval_loss": 0.6909931898117065, "eval_runtime": 72.1497, "eval_samples_per_second": 57.408, "eval_steps_per_second": 14.359, "step": 4000 }, { "epoch": 0.18, "learning_rate": 1.635934450043125e-05, "loss": 0.715, "step": 4010 }, { "epoch": 0.18, "learning_rate": 1.635026555903582e-05, "loss": 0.668, "step": 4020 }, { "epoch": 0.18, "learning_rate": 1.6341186617640386e-05, "loss": 0.6637, "step": 4030 }, { "epoch": 0.18, "learning_rate": 1.633210767624495e-05, "loss": 0.6697, "step": 4040 }, { "epoch": 0.18, "learning_rate": 1.6323028734849518e-05, "loss": 0.5869, "step": 4050 }, { "epoch": 0.18, "learning_rate": 1.6313949793454085e-05, "loss": 0.6004, "step": 4060 }, { "epoch": 0.18, "learning_rate": 1.6304870852058653e-05, "loss": 0.775, "step": 4070 }, { "epoch": 0.19, "learning_rate": 1.629579191066322e-05, "loss": 0.6416, "step": 4080 }, { "epoch": 0.19, "learning_rate": 1.6286712969267785e-05, "loss": 0.641, "step": 4090 }, { "epoch": 0.19, "learning_rate": 1.6277634027872352e-05, "loss": 0.7406, "step": 4100 }, { "epoch": 0.19, "learning_rate": 1.626855508647692e-05, "loss": 0.6365, "step": 4110 }, { "epoch": 0.19, "learning_rate": 1.6259476145081484e-05, "loss": 0.6926, "step": 4120 }, { "epoch": 0.19, "learning_rate": 1.625039720368605e-05, "loss": 0.6426, "step": 4130 }, { "epoch": 0.19, "learning_rate": 1.624131826229062e-05, "loss": 0.7107, "step": 4140 }, { "epoch": 0.19, "learning_rate": 1.6232239320895183e-05, "loss": 0.65, "step": 4150 }, { "epoch": 0.19, "learning_rate": 1.622316037949975e-05, "loss": 0.7195, "step": 4160 }, { "epoch": 0.19, "learning_rate": 1.6214081438104318e-05, "loss": 0.5832, "step": 4170 }, { "epoch": 0.19, "learning_rate": 1.6205002496708886e-05, "loss": 0.7441, "step": 4180 }, { "epoch": 0.19, "learning_rate": 1.619592355531345e-05, "loss": 0.627, "step": 4190 }, { "epoch": 0.19, "learning_rate": 1.6186844613918017e-05, "loss": 0.6598, "step": 4200 }, { "epoch": 0.19, "learning_rate": 1.6177765672522585e-05, "loss": 0.6814, "step": 4210 }, { "epoch": 0.19, "learning_rate": 1.6168686731127152e-05, "loss": 0.7156, "step": 4220 }, { "epoch": 0.19, "learning_rate": 1.615960778973172e-05, "loss": 0.6285, "step": 4230 }, { "epoch": 0.19, "learning_rate": 1.6150528848336284e-05, "loss": 0.7143, "step": 4240 }, { "epoch": 0.19, "learning_rate": 1.6141449906940852e-05, "loss": 0.6299, "step": 4250 }, { "epoch": 0.19, "learning_rate": 1.613237096554542e-05, "loss": 0.6852, "step": 4260 }, { "epoch": 0.19, "learning_rate": 1.6123292024149987e-05, "loss": 0.7059, "step": 4270 }, { "epoch": 0.19, "learning_rate": 1.6114213082754554e-05, "loss": 0.6936, "step": 4280 }, { "epoch": 0.19, "learning_rate": 1.610513414135912e-05, "loss": 0.5746, "step": 4290 }, { "epoch": 0.2, "learning_rate": 1.6096055199963686e-05, "loss": 0.7225, "step": 4300 }, { "epoch": 0.2, "learning_rate": 1.6086976258568254e-05, "loss": 0.6955, "step": 4310 }, { "epoch": 0.2, "learning_rate": 1.6077897317172818e-05, "loss": 0.7551, "step": 4320 }, { "epoch": 0.2, "learning_rate": 1.6068818375777385e-05, "loss": 0.6752, "step": 4330 }, { "epoch": 0.2, "learning_rate": 1.6059739434381953e-05, "loss": 0.7883, "step": 4340 }, { "epoch": 0.2, "learning_rate": 1.6050660492986517e-05, "loss": 0.7078, "step": 4350 }, { "epoch": 0.2, "learning_rate": 1.6041581551591085e-05, "loss": 0.8699, "step": 4360 }, { "epoch": 0.2, "learning_rate": 1.6032502610195652e-05, "loss": 0.6383, "step": 4370 }, { "epoch": 0.2, "learning_rate": 1.602342366880022e-05, "loss": 0.6402, "step": 4380 }, { "epoch": 0.2, "learning_rate": 1.6014344727404787e-05, "loss": 0.6916, "step": 4390 }, { "epoch": 0.2, "learning_rate": 1.600526578600935e-05, "loss": 0.6859, "step": 4400 }, { "epoch": 0.2, "learning_rate": 1.599618684461392e-05, "loss": 0.6668, "step": 4410 }, { "epoch": 0.2, "learning_rate": 1.5987107903218486e-05, "loss": 0.7312, "step": 4420 }, { "epoch": 0.2, "learning_rate": 1.5978028961823054e-05, "loss": 0.6523, "step": 4430 }, { "epoch": 0.2, "learning_rate": 1.596895002042762e-05, "loss": 0.6686, "step": 4440 }, { "epoch": 0.2, "learning_rate": 1.5959871079032186e-05, "loss": 0.7184, "step": 4450 }, { "epoch": 0.2, "learning_rate": 1.5950792137636753e-05, "loss": 0.65, "step": 4460 }, { "epoch": 0.2, "learning_rate": 1.594171319624132e-05, "loss": 0.7178, "step": 4470 }, { "epoch": 0.2, "learning_rate": 1.5932634254845888e-05, "loss": 0.6918, "step": 4480 }, { "epoch": 0.2, "learning_rate": 1.5923555313450452e-05, "loss": 0.5949, "step": 4490 }, { "epoch": 0.2, "learning_rate": 1.591447637205502e-05, "loss": 0.6268, "step": 4500 }, { "epoch": 0.2, "eval_accuracy": 0.5929502655721873, "eval_loss": 0.6875754594802856, "eval_runtime": 74.4346, "eval_samples_per_second": 55.646, "eval_steps_per_second": 13.918, "step": 4500 }, { "epoch": 0.2, "learning_rate": 1.5905397430659588e-05, "loss": 0.6125, "step": 4510 }, { "epoch": 0.21, "learning_rate": 1.589631848926415e-05, "loss": 0.8043, "step": 4520 }, { "epoch": 0.21, "learning_rate": 1.588723954786872e-05, "loss": 0.6482, "step": 4530 }, { "epoch": 0.21, "learning_rate": 1.5878160606473287e-05, "loss": 0.7039, "step": 4540 }, { "epoch": 0.21, "learning_rate": 1.586908166507785e-05, "loss": 0.6203, "step": 4550 }, { "epoch": 0.21, "learning_rate": 1.586000272368242e-05, "loss": 0.6557, "step": 4560 }, { "epoch": 0.21, "learning_rate": 1.5850923782286986e-05, "loss": 0.6105, "step": 4570 }, { "epoch": 0.21, "learning_rate": 1.5841844840891554e-05, "loss": 0.8234, "step": 4580 }, { "epoch": 0.21, "learning_rate": 1.583276589949612e-05, "loss": 0.6199, "step": 4590 }, { "epoch": 0.21, "learning_rate": 1.5823686958100685e-05, "loss": 0.675, "step": 4600 }, { "epoch": 0.21, "learning_rate": 1.5814608016705253e-05, "loss": 0.6316, "step": 4610 }, { "epoch": 0.21, "learning_rate": 1.580552907530982e-05, "loss": 0.7779, "step": 4620 }, { "epoch": 0.21, "learning_rate": 1.5796450133914388e-05, "loss": 0.5781, "step": 4630 }, { "epoch": 0.21, "learning_rate": 1.5787371192518955e-05, "loss": 0.7367, "step": 4640 }, { "epoch": 0.21, "learning_rate": 1.577829225112352e-05, "loss": 0.7154, "step": 4650 }, { "epoch": 0.21, "learning_rate": 1.5769213309728087e-05, "loss": 0.7148, "step": 4660 }, { "epoch": 0.21, "learning_rate": 1.5760134368332655e-05, "loss": 0.6227, "step": 4670 }, { "epoch": 0.21, "learning_rate": 1.5751055426937222e-05, "loss": 0.5682, "step": 4680 }, { "epoch": 0.21, "learning_rate": 1.5741976485541786e-05, "loss": 0.6184, "step": 4690 }, { "epoch": 0.21, "learning_rate": 1.5732897544146354e-05, "loss": 0.6588, "step": 4700 }, { "epoch": 0.21, "learning_rate": 1.572381860275092e-05, "loss": 0.6627, "step": 4710 }, { "epoch": 0.21, "learning_rate": 1.5714739661355486e-05, "loss": 0.6479, "step": 4720 }, { "epoch": 0.21, "learning_rate": 1.5705660719960053e-05, "loss": 0.7141, "step": 4730 }, { "epoch": 0.22, "learning_rate": 1.569658177856462e-05, "loss": 0.6545, "step": 4740 }, { "epoch": 0.22, "learning_rate": 1.5687502837169188e-05, "loss": 0.6789, "step": 4750 }, { "epoch": 0.22, "learning_rate": 1.5678423895773752e-05, "loss": 0.6906, "step": 4760 }, { "epoch": 0.22, "learning_rate": 1.566934495437832e-05, "loss": 0.7527, "step": 4770 }, { "epoch": 0.22, "learning_rate": 1.5660266012982887e-05, "loss": 0.599, "step": 4780 }, { "epoch": 0.22, "learning_rate": 1.5651187071587455e-05, "loss": 0.7096, "step": 4790 }, { "epoch": 0.22, "learning_rate": 1.5642108130192023e-05, "loss": 0.6578, "step": 4800 }, { "epoch": 0.22, "learning_rate": 1.5633029188796587e-05, "loss": 0.6953, "step": 4810 }, { "epoch": 0.22, "learning_rate": 1.5623950247401154e-05, "loss": 0.7516, "step": 4820 }, { "epoch": 0.22, "learning_rate": 1.5614871306005722e-05, "loss": 0.6023, "step": 4830 }, { "epoch": 0.22, "learning_rate": 1.560579236461029e-05, "loss": 0.6684, "step": 4840 }, { "epoch": 0.22, "learning_rate": 1.5596713423214857e-05, "loss": 0.748, "step": 4850 }, { "epoch": 0.22, "learning_rate": 1.558763448181942e-05, "loss": 0.6287, "step": 4860 }, { "epoch": 0.22, "learning_rate": 1.557855554042399e-05, "loss": 0.7781, "step": 4870 }, { "epoch": 0.22, "learning_rate": 1.5569476599028556e-05, "loss": 0.7213, "step": 4880 }, { "epoch": 0.22, "learning_rate": 1.556039765763312e-05, "loss": 0.673, "step": 4890 }, { "epoch": 0.22, "learning_rate": 1.5551318716237688e-05, "loss": 0.8391, "step": 4900 }, { "epoch": 0.22, "learning_rate": 1.5542239774842255e-05, "loss": 0.7537, "step": 4910 }, { "epoch": 0.22, "learning_rate": 1.553316083344682e-05, "loss": 0.6406, "step": 4920 }, { "epoch": 0.22, "learning_rate": 1.5524081892051387e-05, "loss": 0.6082, "step": 4930 }, { "epoch": 0.22, "learning_rate": 1.5515002950655955e-05, "loss": 0.7135, "step": 4940 }, { "epoch": 0.22, "learning_rate": 1.5505924009260522e-05, "loss": 0.6555, "step": 4950 }, { "epoch": 0.23, "learning_rate": 1.5496845067865086e-05, "loss": 0.6604, "step": 4960 }, { "epoch": 0.23, "learning_rate": 1.5487766126469654e-05, "loss": 0.7117, "step": 4970 }, { "epoch": 0.23, "learning_rate": 1.547868718507422e-05, "loss": 0.7215, "step": 4980 }, { "epoch": 0.23, "learning_rate": 1.546960824367879e-05, "loss": 0.6865, "step": 4990 }, { "epoch": 0.23, "learning_rate": 1.5460529302283356e-05, "loss": 0.7523, "step": 5000 }, { "epoch": 0.23, "eval_accuracy": 0.59753742153549, "eval_loss": 0.6818556189537048, "eval_runtime": 74.991, "eval_samples_per_second": 55.233, "eval_steps_per_second": 13.815, "step": 5000 }, { "epoch": 0.23, "learning_rate": 1.545145036088792e-05, "loss": 0.7436, "step": 5010 }, { "epoch": 0.23, "learning_rate": 1.5442371419492488e-05, "loss": 0.7418, "step": 5020 }, { "epoch": 0.23, "learning_rate": 1.5433292478097056e-05, "loss": 0.6656, "step": 5030 }, { "epoch": 0.23, "learning_rate": 1.5424213536701623e-05, "loss": 0.7031, "step": 5040 }, { "epoch": 0.23, "learning_rate": 1.541513459530619e-05, "loss": 0.6496, "step": 5050 }, { "epoch": 0.23, "learning_rate": 1.5406055653910755e-05, "loss": 0.7084, "step": 5060 }, { "epoch": 0.23, "learning_rate": 1.5396976712515323e-05, "loss": 0.6781, "step": 5070 }, { "epoch": 0.23, "learning_rate": 1.538789777111989e-05, "loss": 0.7336, "step": 5080 }, { "epoch": 0.23, "learning_rate": 1.5378818829724454e-05, "loss": 0.6762, "step": 5090 }, { "epoch": 0.23, "learning_rate": 1.5369739888329022e-05, "loss": 0.6391, "step": 5100 }, { "epoch": 0.23, "learning_rate": 1.536066094693359e-05, "loss": 0.7719, "step": 5110 }, { "epoch": 0.23, "learning_rate": 1.5351582005538153e-05, "loss": 0.6418, "step": 5120 }, { "epoch": 0.23, "learning_rate": 1.534250306414272e-05, "loss": 0.6543, "step": 5130 }, { "epoch": 0.23, "learning_rate": 1.533342412274729e-05, "loss": 0.6639, "step": 5140 }, { "epoch": 0.23, "learning_rate": 1.5324345181351856e-05, "loss": 0.6256, "step": 5150 }, { "epoch": 0.23, "learning_rate": 1.5315266239956424e-05, "loss": 0.6721, "step": 5160 }, { "epoch": 0.23, "learning_rate": 1.5306187298560988e-05, "loss": 0.7191, "step": 5170 }, { "epoch": 0.24, "learning_rate": 1.5297108357165555e-05, "loss": 0.7355, "step": 5180 }, { "epoch": 0.24, "learning_rate": 1.5288029415770123e-05, "loss": 0.6605, "step": 5190 }, { "epoch": 0.24, "learning_rate": 1.527895047437469e-05, "loss": 0.6436, "step": 5200 }, { "epoch": 0.24, "learning_rate": 1.5269871532979258e-05, "loss": 0.6848, "step": 5210 }, { "epoch": 0.24, "learning_rate": 1.5260792591583822e-05, "loss": 0.7281, "step": 5220 }, { "epoch": 0.24, "learning_rate": 1.525171365018839e-05, "loss": 0.6137, "step": 5230 }, { "epoch": 0.24, "learning_rate": 1.5242634708792955e-05, "loss": 0.7535, "step": 5240 }, { "epoch": 0.24, "learning_rate": 1.5233555767397523e-05, "loss": 0.7076, "step": 5250 }, { "epoch": 0.24, "learning_rate": 1.522447682600209e-05, "loss": 0.6793, "step": 5260 }, { "epoch": 0.24, "learning_rate": 1.5215397884606655e-05, "loss": 0.7406, "step": 5270 }, { "epoch": 0.24, "learning_rate": 1.5206318943211222e-05, "loss": 0.7, "step": 5280 }, { "epoch": 0.24, "learning_rate": 1.519724000181579e-05, "loss": 0.6547, "step": 5290 }, { "epoch": 0.24, "learning_rate": 1.5188161060420357e-05, "loss": 0.6844, "step": 5300 }, { "epoch": 0.24, "learning_rate": 1.5179082119024923e-05, "loss": 0.6543, "step": 5310 }, { "epoch": 0.24, "learning_rate": 1.5170003177629489e-05, "loss": 0.7021, "step": 5320 }, { "epoch": 0.24, "learning_rate": 1.5160924236234057e-05, "loss": 0.6594, "step": 5330 }, { "epoch": 0.24, "learning_rate": 1.5151845294838622e-05, "loss": 0.7711, "step": 5340 }, { "epoch": 0.24, "learning_rate": 1.514276635344319e-05, "loss": 0.6973, "step": 5350 }, { "epoch": 0.24, "learning_rate": 1.5133687412047758e-05, "loss": 0.6928, "step": 5360 }, { "epoch": 0.24, "learning_rate": 1.5124608470652322e-05, "loss": 0.7063, "step": 5370 }, { "epoch": 0.24, "learning_rate": 1.511552952925689e-05, "loss": 0.6875, "step": 5380 }, { "epoch": 0.24, "learning_rate": 1.5106450587861457e-05, "loss": 0.65, "step": 5390 }, { "epoch": 0.25, "learning_rate": 1.5097371646466024e-05, "loss": 0.6594, "step": 5400 }, { "epoch": 0.25, "learning_rate": 1.508829270507059e-05, "loss": 0.6527, "step": 5410 }, { "epoch": 0.25, "learning_rate": 1.5079213763675156e-05, "loss": 0.6918, "step": 5420 }, { "epoch": 0.25, "learning_rate": 1.5070134822279724e-05, "loss": 0.7523, "step": 5430 }, { "epoch": 0.25, "learning_rate": 1.506105588088429e-05, "loss": 0.6365, "step": 5440 }, { "epoch": 0.25, "learning_rate": 1.5051976939488857e-05, "loss": 0.7805, "step": 5450 }, { "epoch": 0.25, "learning_rate": 1.5042897998093424e-05, "loss": 0.6734, "step": 5460 }, { "epoch": 0.25, "learning_rate": 1.5033819056697992e-05, "loss": 0.748, "step": 5470 }, { "epoch": 0.25, "learning_rate": 1.5024740115302556e-05, "loss": 0.6609, "step": 5480 }, { "epoch": 0.25, "learning_rate": 1.5015661173907124e-05, "loss": 0.6475, "step": 5490 }, { "epoch": 0.25, "learning_rate": 1.5006582232511691e-05, "loss": 0.6582, "step": 5500 }, { "epoch": 0.25, "eval_accuracy": 0.6028488652824723, "eval_loss": 0.6788424849510193, "eval_runtime": 72.2831, "eval_samples_per_second": 57.302, "eval_steps_per_second": 14.333, "step": 5500 }, { "epoch": 0.25, "learning_rate": 1.4997503291116257e-05, "loss": 0.6637, "step": 5510 }, { "epoch": 0.25, "learning_rate": 1.4988424349720825e-05, "loss": 0.6816, "step": 5520 }, { "epoch": 0.25, "learning_rate": 1.497934540832539e-05, "loss": 0.6617, "step": 5530 }, { "epoch": 0.25, "learning_rate": 1.4970266466929956e-05, "loss": 0.6871, "step": 5540 }, { "epoch": 0.25, "learning_rate": 1.4961187525534524e-05, "loss": 0.6793, "step": 5550 }, { "epoch": 0.25, "learning_rate": 1.4952108584139091e-05, "loss": 0.6922, "step": 5560 }, { "epoch": 0.25, "learning_rate": 1.4943029642743659e-05, "loss": 0.5648, "step": 5570 }, { "epoch": 0.25, "learning_rate": 1.4933950701348223e-05, "loss": 0.6439, "step": 5580 }, { "epoch": 0.25, "learning_rate": 1.492487175995279e-05, "loss": 0.7004, "step": 5590 }, { "epoch": 0.25, "learning_rate": 1.4915792818557358e-05, "loss": 0.8055, "step": 5600 }, { "epoch": 0.25, "learning_rate": 1.4906713877161924e-05, "loss": 0.5594, "step": 5610 }, { "epoch": 0.26, "learning_rate": 1.4897634935766492e-05, "loss": 0.8199, "step": 5620 }, { "epoch": 0.26, "learning_rate": 1.4888555994371057e-05, "loss": 0.6451, "step": 5630 }, { "epoch": 0.26, "learning_rate": 1.4879477052975623e-05, "loss": 0.6434, "step": 5640 }, { "epoch": 0.26, "learning_rate": 1.4870398111580191e-05, "loss": 0.6396, "step": 5650 }, { "epoch": 0.26, "learning_rate": 1.4861319170184758e-05, "loss": 0.5916, "step": 5660 }, { "epoch": 0.26, "learning_rate": 1.4852240228789326e-05, "loss": 0.7096, "step": 5670 }, { "epoch": 0.26, "learning_rate": 1.484316128739389e-05, "loss": 0.6758, "step": 5680 }, { "epoch": 0.26, "learning_rate": 1.4834082345998458e-05, "loss": 0.6438, "step": 5690 }, { "epoch": 0.26, "learning_rate": 1.4825003404603025e-05, "loss": 0.6777, "step": 5700 }, { "epoch": 0.26, "learning_rate": 1.4815924463207591e-05, "loss": 0.7172, "step": 5710 }, { "epoch": 0.26, "learning_rate": 1.4806845521812159e-05, "loss": 0.7588, "step": 5720 }, { "epoch": 0.26, "learning_rate": 1.4797766580416724e-05, "loss": 0.651, "step": 5730 }, { "epoch": 0.26, "learning_rate": 1.478868763902129e-05, "loss": 0.6287, "step": 5740 }, { "epoch": 0.26, "learning_rate": 1.4779608697625858e-05, "loss": 0.6875, "step": 5750 }, { "epoch": 0.26, "learning_rate": 1.4770529756230425e-05, "loss": 0.6582, "step": 5760 }, { "epoch": 0.26, "learning_rate": 1.4761450814834993e-05, "loss": 0.6656, "step": 5770 }, { "epoch": 0.26, "learning_rate": 1.4752371873439557e-05, "loss": 0.6037, "step": 5780 }, { "epoch": 0.26, "learning_rate": 1.4743292932044125e-05, "loss": 0.6973, "step": 5790 }, { "epoch": 0.26, "learning_rate": 1.4734213990648692e-05, "loss": 0.6377, "step": 5800 }, { "epoch": 0.26, "learning_rate": 1.4725135049253258e-05, "loss": 0.7617, "step": 5810 }, { "epoch": 0.26, "learning_rate": 1.4716056107857826e-05, "loss": 0.6621, "step": 5820 }, { "epoch": 0.26, "learning_rate": 1.4706977166462391e-05, "loss": 0.6861, "step": 5830 }, { "epoch": 0.27, "learning_rate": 1.4697898225066957e-05, "loss": 0.6504, "step": 5840 }, { "epoch": 0.27, "learning_rate": 1.4688819283671525e-05, "loss": 0.7766, "step": 5850 }, { "epoch": 0.27, "learning_rate": 1.4679740342276092e-05, "loss": 0.6512, "step": 5860 }, { "epoch": 0.27, "learning_rate": 1.467066140088066e-05, "loss": 0.698, "step": 5870 }, { "epoch": 0.27, "learning_rate": 1.4661582459485226e-05, "loss": 0.5723, "step": 5880 }, { "epoch": 0.27, "learning_rate": 1.4652503518089792e-05, "loss": 0.6986, "step": 5890 }, { "epoch": 0.27, "learning_rate": 1.4643424576694359e-05, "loss": 0.6633, "step": 5900 }, { "epoch": 0.27, "learning_rate": 1.4634345635298925e-05, "loss": 0.5979, "step": 5910 }, { "epoch": 0.27, "learning_rate": 1.4625266693903493e-05, "loss": 0.6746, "step": 5920 }, { "epoch": 0.27, "learning_rate": 1.461618775250806e-05, "loss": 0.7547, "step": 5930 }, { "epoch": 0.27, "learning_rate": 1.4607108811112624e-05, "loss": 0.7641, "step": 5940 }, { "epoch": 0.27, "learning_rate": 1.4598029869717192e-05, "loss": 0.6393, "step": 5950 }, { "epoch": 0.27, "learning_rate": 1.458895092832176e-05, "loss": 0.6977, "step": 5960 }, { "epoch": 0.27, "learning_rate": 1.4579871986926327e-05, "loss": 0.6777, "step": 5970 }, { "epoch": 0.27, "learning_rate": 1.4570793045530893e-05, "loss": 0.6963, "step": 5980 }, { "epoch": 0.27, "learning_rate": 1.4561714104135459e-05, "loss": 0.6664, "step": 5990 }, { "epoch": 0.27, "learning_rate": 1.4552635162740026e-05, "loss": 0.5879, "step": 6000 }, { "epoch": 0.27, "eval_accuracy": 0.6057460164171897, "eval_loss": 0.6761283278465271, "eval_runtime": 72.9085, "eval_samples_per_second": 56.811, "eval_steps_per_second": 14.21, "step": 6000 }, { "epoch": 0.27, "learning_rate": 1.4543556221344592e-05, "loss": 0.6695, "step": 6010 }, { "epoch": 0.27, "learning_rate": 1.453447727994916e-05, "loss": 0.5879, "step": 6020 }, { "epoch": 0.27, "learning_rate": 1.4525398338553727e-05, "loss": 0.7484, "step": 6030 }, { "epoch": 0.27, "learning_rate": 1.4516319397158291e-05, "loss": 0.6438, "step": 6040 }, { "epoch": 0.27, "learning_rate": 1.4507240455762859e-05, "loss": 0.6422, "step": 6050 }, { "epoch": 0.28, "learning_rate": 1.4498161514367426e-05, "loss": 0.718, "step": 6060 }, { "epoch": 0.28, "learning_rate": 1.4489082572971994e-05, "loss": 0.6305, "step": 6070 }, { "epoch": 0.28, "learning_rate": 1.448000363157656e-05, "loss": 0.6605, "step": 6080 }, { "epoch": 0.28, "learning_rate": 1.4470924690181126e-05, "loss": 0.6598, "step": 6090 }, { "epoch": 0.28, "learning_rate": 1.4461845748785693e-05, "loss": 0.7359, "step": 6100 }, { "epoch": 0.28, "learning_rate": 1.4452766807390259e-05, "loss": 0.7461, "step": 6110 }, { "epoch": 0.28, "learning_rate": 1.4443687865994826e-05, "loss": 0.5742, "step": 6120 }, { "epoch": 0.28, "learning_rate": 1.4434608924599394e-05, "loss": 0.6814, "step": 6130 }, { "epoch": 0.28, "learning_rate": 1.4425529983203958e-05, "loss": 0.6605, "step": 6140 }, { "epoch": 0.28, "learning_rate": 1.4416451041808526e-05, "loss": 0.6927, "step": 6150 }, { "epoch": 0.28, "learning_rate": 1.4407372100413093e-05, "loss": 0.6391, "step": 6160 }, { "epoch": 0.28, "learning_rate": 1.439829315901766e-05, "loss": 0.6, "step": 6170 }, { "epoch": 0.28, "learning_rate": 1.4389214217622227e-05, "loss": 0.6977, "step": 6180 }, { "epoch": 0.28, "learning_rate": 1.4380135276226792e-05, "loss": 0.6307, "step": 6190 }, { "epoch": 0.28, "learning_rate": 1.437105633483136e-05, "loss": 0.642, "step": 6200 }, { "epoch": 0.28, "learning_rate": 1.4361977393435926e-05, "loss": 0.6969, "step": 6210 }, { "epoch": 0.28, "learning_rate": 1.4352898452040493e-05, "loss": 0.6883, "step": 6220 }, { "epoch": 0.28, "learning_rate": 1.4343819510645061e-05, "loss": 0.6189, "step": 6230 }, { "epoch": 0.28, "learning_rate": 1.4334740569249628e-05, "loss": 0.7242, "step": 6240 }, { "epoch": 0.28, "learning_rate": 1.4325661627854193e-05, "loss": 0.7031, "step": 6250 }, { "epoch": 0.28, "learning_rate": 1.431658268645876e-05, "loss": 0.6645, "step": 6260 }, { "epoch": 0.28, "learning_rate": 1.4307503745063328e-05, "loss": 0.6244, "step": 6270 }, { "epoch": 0.29, "learning_rate": 1.4298424803667894e-05, "loss": 0.6785, "step": 6280 }, { "epoch": 0.29, "learning_rate": 1.4289345862272461e-05, "loss": 0.6316, "step": 6290 }, { "epoch": 0.29, "learning_rate": 1.4280266920877027e-05, "loss": 0.5918, "step": 6300 }, { "epoch": 0.29, "learning_rate": 1.4271187979481593e-05, "loss": 0.6477, "step": 6310 }, { "epoch": 0.29, "learning_rate": 1.426210903808616e-05, "loss": 0.6242, "step": 6320 }, { "epoch": 0.29, "learning_rate": 1.4253030096690728e-05, "loss": 0.7172, "step": 6330 }, { "epoch": 0.29, "learning_rate": 1.4243951155295295e-05, "loss": 0.5865, "step": 6340 }, { "epoch": 0.29, "learning_rate": 1.423487221389986e-05, "loss": 0.5662, "step": 6350 }, { "epoch": 0.29, "learning_rate": 1.4225793272504427e-05, "loss": 0.7059, "step": 6360 }, { "epoch": 0.29, "learning_rate": 1.4216714331108995e-05, "loss": 0.6787, "step": 6370 }, { "epoch": 0.29, "learning_rate": 1.420763538971356e-05, "loss": 0.7768, "step": 6380 }, { "epoch": 0.29, "learning_rate": 1.4198556448318128e-05, "loss": 0.6779, "step": 6390 }, { "epoch": 0.29, "learning_rate": 1.4189477506922694e-05, "loss": 0.7148, "step": 6400 }, { "epoch": 0.29, "learning_rate": 1.418039856552726e-05, "loss": 0.6711, "step": 6410 }, { "epoch": 0.29, "learning_rate": 1.4171319624131827e-05, "loss": 0.5986, "step": 6420 }, { "epoch": 0.29, "learning_rate": 1.4162240682736395e-05, "loss": 0.7102, "step": 6430 }, { "epoch": 0.29, "learning_rate": 1.4153161741340962e-05, "loss": 0.6748, "step": 6440 }, { "epoch": 0.29, "learning_rate": 1.4144082799945527e-05, "loss": 0.7035, "step": 6450 }, { "epoch": 0.29, "learning_rate": 1.4135003858550094e-05, "loss": 0.6783, "step": 6460 }, { "epoch": 0.29, "learning_rate": 1.4125924917154662e-05, "loss": 0.64, "step": 6470 }, { "epoch": 0.29, "learning_rate": 1.4116845975759228e-05, "loss": 0.5697, "step": 6480 }, { "epoch": 0.29, "learning_rate": 1.4107767034363795e-05, "loss": 0.6951, "step": 6490 }, { "epoch": 0.3, "learning_rate": 1.4098688092968361e-05, "loss": 0.6676, "step": 6500 }, { "epoch": 0.3, "eval_accuracy": 0.6064703042008691, "eval_loss": 0.6781559586524963, "eval_runtime": 74.4693, "eval_samples_per_second": 55.62, "eval_steps_per_second": 13.912, "step": 6500 }, { "epoch": 0.3, "learning_rate": 1.4089609151572927e-05, "loss": 0.7078, "step": 6510 }, { "epoch": 0.3, "learning_rate": 1.4080530210177494e-05, "loss": 0.7605, "step": 6520 }, { "epoch": 0.3, "learning_rate": 1.4071451268782062e-05, "loss": 0.7063, "step": 6530 }, { "epoch": 0.3, "learning_rate": 1.406237232738663e-05, "loss": 0.6715, "step": 6540 }, { "epoch": 0.3, "learning_rate": 1.4053293385991194e-05, "loss": 0.7559, "step": 6550 }, { "epoch": 0.3, "learning_rate": 1.4044214444595761e-05, "loss": 0.6426, "step": 6560 }, { "epoch": 0.3, "learning_rate": 1.4035135503200329e-05, "loss": 0.6973, "step": 6570 }, { "epoch": 0.3, "learning_rate": 1.4026056561804894e-05, "loss": 0.6502, "step": 6580 }, { "epoch": 0.3, "learning_rate": 1.4016977620409462e-05, "loss": 0.6734, "step": 6590 }, { "epoch": 0.3, "learning_rate": 1.4007898679014026e-05, "loss": 0.6262, "step": 6600 }, { "epoch": 0.3, "learning_rate": 1.3998819737618594e-05, "loss": 0.7363, "step": 6610 }, { "epoch": 0.3, "learning_rate": 1.3989740796223161e-05, "loss": 0.6805, "step": 6620 }, { "epoch": 0.3, "learning_rate": 1.3980661854827729e-05, "loss": 0.6609, "step": 6630 }, { "epoch": 0.3, "learning_rate": 1.3971582913432296e-05, "loss": 0.6365, "step": 6640 }, { "epoch": 0.3, "learning_rate": 1.3962503972036862e-05, "loss": 0.6311, "step": 6650 }, { "epoch": 0.3, "learning_rate": 1.3953425030641428e-05, "loss": 0.7283, "step": 6660 }, { "epoch": 0.3, "learning_rate": 1.3944346089245996e-05, "loss": 0.6016, "step": 6670 }, { "epoch": 0.3, "learning_rate": 1.3935267147850561e-05, "loss": 0.7059, "step": 6680 }, { "epoch": 0.3, "learning_rate": 1.3926188206455129e-05, "loss": 0.5814, "step": 6690 }, { "epoch": 0.3, "learning_rate": 1.3917109265059697e-05, "loss": 0.6375, "step": 6700 }, { "epoch": 0.3, "learning_rate": 1.390803032366426e-05, "loss": 0.7088, "step": 6710 }, { "epoch": 0.31, "learning_rate": 1.3898951382268828e-05, "loss": 0.6859, "step": 6720 }, { "epoch": 0.31, "learning_rate": 1.3889872440873396e-05, "loss": 0.7002, "step": 6730 }, { "epoch": 0.31, "learning_rate": 1.3880793499477963e-05, "loss": 0.7227, "step": 6740 }, { "epoch": 0.31, "learning_rate": 1.3871714558082529e-05, "loss": 0.6623, "step": 6750 }, { "epoch": 0.31, "learning_rate": 1.3862635616687095e-05, "loss": 0.685, "step": 6760 }, { "epoch": 0.31, "learning_rate": 1.3853556675291663e-05, "loss": 0.6824, "step": 6770 }, { "epoch": 0.31, "learning_rate": 1.3844477733896228e-05, "loss": 0.6699, "step": 6780 }, { "epoch": 0.31, "learning_rate": 1.3835398792500796e-05, "loss": 0.7406, "step": 6790 }, { "epoch": 0.31, "learning_rate": 1.3826319851105363e-05, "loss": 0.6277, "step": 6800 }, { "epoch": 0.31, "learning_rate": 1.3817240909709928e-05, "loss": 0.7102, "step": 6810 }, { "epoch": 0.31, "learning_rate": 1.3808161968314495e-05, "loss": 0.6322, "step": 6820 }, { "epoch": 0.31, "learning_rate": 1.3799083026919063e-05, "loss": 0.7041, "step": 6830 }, { "epoch": 0.31, "learning_rate": 1.379000408552363e-05, "loss": 0.6141, "step": 6840 }, { "epoch": 0.31, "learning_rate": 1.3780925144128196e-05, "loss": 0.6508, "step": 6850 }, { "epoch": 0.31, "learning_rate": 1.3771846202732762e-05, "loss": 0.7113, "step": 6860 }, { "epoch": 0.31, "learning_rate": 1.376276726133733e-05, "loss": 0.7309, "step": 6870 }, { "epoch": 0.31, "learning_rate": 1.3753688319941895e-05, "loss": 0.6883, "step": 6880 }, { "epoch": 0.31, "learning_rate": 1.3744609378546463e-05, "loss": 0.7211, "step": 6890 }, { "epoch": 0.31, "learning_rate": 1.373553043715103e-05, "loss": 0.7285, "step": 6900 }, { "epoch": 0.31, "learning_rate": 1.3726451495755595e-05, "loss": 0.7053, "step": 6910 }, { "epoch": 0.31, "learning_rate": 1.3717372554360162e-05, "loss": 0.652, "step": 6920 }, { "epoch": 0.31, "learning_rate": 1.370829361296473e-05, "loss": 0.6754, "step": 6930 }, { "epoch": 0.32, "learning_rate": 1.3699214671569297e-05, "loss": 0.7719, "step": 6940 }, { "epoch": 0.32, "learning_rate": 1.3690135730173863e-05, "loss": 0.6084, "step": 6950 }, { "epoch": 0.32, "learning_rate": 1.3681056788778429e-05, "loss": 0.6129, "step": 6960 }, { "epoch": 0.32, "learning_rate": 1.3671977847382995e-05, "loss": 0.6258, "step": 6970 }, { "epoch": 0.32, "learning_rate": 1.3662898905987562e-05, "loss": 0.6885, "step": 6980 }, { "epoch": 0.32, "learning_rate": 1.365381996459213e-05, "loss": 0.6445, "step": 6990 }, { "epoch": 0.32, "learning_rate": 1.3644741023196697e-05, "loss": 0.7045, "step": 7000 }, { "epoch": 0.32, "eval_accuracy": 0.6038145823273781, "eval_loss": 0.6830967664718628, "eval_runtime": 72.7514, "eval_samples_per_second": 56.934, "eval_steps_per_second": 14.24, "step": 7000 }, { "epoch": 0.32, "learning_rate": 1.3635662081801265e-05, "loss": 0.6984, "step": 7010 }, { "epoch": 0.32, "learning_rate": 1.3626583140405829e-05, "loss": 0.6678, "step": 7020 }, { "epoch": 0.32, "learning_rate": 1.3617504199010397e-05, "loss": 0.7355, "step": 7030 }, { "epoch": 0.32, "learning_rate": 1.3608425257614964e-05, "loss": 0.6607, "step": 7040 }, { "epoch": 0.32, "learning_rate": 1.359934631621953e-05, "loss": 0.6746, "step": 7050 }, { "epoch": 0.32, "learning_rate": 1.3590267374824098e-05, "loss": 0.5451, "step": 7060 }, { "epoch": 0.32, "learning_rate": 1.3581188433428662e-05, "loss": 0.6838, "step": 7070 }, { "epoch": 0.32, "learning_rate": 1.357210949203323e-05, "loss": 0.6322, "step": 7080 }, { "epoch": 0.32, "learning_rate": 1.3563030550637797e-05, "loss": 0.5895, "step": 7090 }, { "epoch": 0.32, "learning_rate": 1.3553951609242364e-05, "loss": 0.6818, "step": 7100 }, { "epoch": 0.32, "learning_rate": 1.3544872667846932e-05, "loss": 0.777, "step": 7110 }, { "epoch": 0.32, "learning_rate": 1.3535793726451496e-05, "loss": 0.7098, "step": 7120 }, { "epoch": 0.32, "learning_rate": 1.3526714785056064e-05, "loss": 0.7652, "step": 7130 }, { "epoch": 0.32, "learning_rate": 1.3517635843660631e-05, "loss": 0.6576, "step": 7140 }, { "epoch": 0.32, "learning_rate": 1.3508556902265197e-05, "loss": 0.6236, "step": 7150 }, { "epoch": 0.33, "learning_rate": 1.3499477960869765e-05, "loss": 0.6613, "step": 7160 }, { "epoch": 0.33, "learning_rate": 1.3490399019474329e-05, "loss": 0.6992, "step": 7170 }, { "epoch": 0.33, "learning_rate": 1.3481320078078896e-05, "loss": 0.5818, "step": 7180 }, { "epoch": 0.33, "learning_rate": 1.3472241136683464e-05, "loss": 0.6623, "step": 7190 }, { "epoch": 0.33, "learning_rate": 1.3463162195288031e-05, "loss": 0.6654, "step": 7200 }, { "epoch": 0.33, "learning_rate": 1.3454083253892599e-05, "loss": 0.7695, "step": 7210 }, { "epoch": 0.33, "learning_rate": 1.3445004312497163e-05, "loss": 0.6055, "step": 7220 }, { "epoch": 0.33, "learning_rate": 1.343592537110173e-05, "loss": 0.7094, "step": 7230 }, { "epoch": 0.33, "learning_rate": 1.3426846429706298e-05, "loss": 0.684, "step": 7240 }, { "epoch": 0.33, "learning_rate": 1.3417767488310864e-05, "loss": 0.7111, "step": 7250 }, { "epoch": 0.33, "learning_rate": 1.3408688546915431e-05, "loss": 0.7848, "step": 7260 }, { "epoch": 0.33, "learning_rate": 1.3399609605519996e-05, "loss": 0.7582, "step": 7270 }, { "epoch": 0.33, "learning_rate": 1.3390530664124563e-05, "loss": 0.7223, "step": 7280 }, { "epoch": 0.33, "learning_rate": 1.338145172272913e-05, "loss": 0.6324, "step": 7290 }, { "epoch": 0.33, "learning_rate": 1.3372372781333698e-05, "loss": 0.673, "step": 7300 }, { "epoch": 0.33, "learning_rate": 1.3363293839938266e-05, "loss": 0.6451, "step": 7310 }, { "epoch": 0.33, "learning_rate": 1.335421489854283e-05, "loss": 0.6449, "step": 7320 }, { "epoch": 0.33, "learning_rate": 1.3345135957147398e-05, "loss": 0.7188, "step": 7330 }, { "epoch": 0.33, "learning_rate": 1.3336057015751963e-05, "loss": 0.7484, "step": 7340 }, { "epoch": 0.33, "learning_rate": 1.3326978074356531e-05, "loss": 0.6664, "step": 7350 }, { "epoch": 0.33, "learning_rate": 1.3317899132961098e-05, "loss": 0.7406, "step": 7360 }, { "epoch": 0.33, "learning_rate": 1.3308820191565666e-05, "loss": 0.7078, "step": 7370 }, { "epoch": 0.34, "learning_rate": 1.329974125017023e-05, "loss": 0.5885, "step": 7380 }, { "epoch": 0.34, "learning_rate": 1.3290662308774798e-05, "loss": 0.7395, "step": 7390 }, { "epoch": 0.34, "learning_rate": 1.3281583367379365e-05, "loss": 0.6184, "step": 7400 }, { "epoch": 0.34, "learning_rate": 1.3272504425983933e-05, "loss": 0.7012, "step": 7410 }, { "epoch": 0.34, "learning_rate": 1.3263425484588499e-05, "loss": 0.7332, "step": 7420 }, { "epoch": 0.34, "learning_rate": 1.3254346543193064e-05, "loss": 0.5527, "step": 7430 }, { "epoch": 0.34, "learning_rate": 1.324526760179763e-05, "loss": 0.6139, "step": 7440 }, { "epoch": 0.34, "learning_rate": 1.3236188660402198e-05, "loss": 0.6439, "step": 7450 }, { "epoch": 0.34, "learning_rate": 1.3227109719006765e-05, "loss": 0.6129, "step": 7460 }, { "epoch": 0.34, "learning_rate": 1.3218030777611333e-05, "loss": 0.6891, "step": 7470 }, { "epoch": 0.34, "learning_rate": 1.3208951836215897e-05, "loss": 0.7117, "step": 7480 }, { "epoch": 0.34, "learning_rate": 1.3199872894820465e-05, "loss": 0.7484, "step": 7490 }, { "epoch": 0.34, "learning_rate": 1.3190793953425032e-05, "loss": 0.6268, "step": 7500 }, { "epoch": 0.34, "eval_accuracy": 0.6071945919845485, "eval_loss": 0.6775137186050415, "eval_runtime": 73.7281, "eval_samples_per_second": 56.179, "eval_steps_per_second": 14.052, "step": 7500 }, { "epoch": 0.34, "learning_rate": 1.31817150120296e-05, "loss": 0.7041, "step": 7510 }, { "epoch": 0.34, "learning_rate": 1.3172636070634166e-05, "loss": 0.7602, "step": 7520 }, { "epoch": 0.34, "learning_rate": 1.3163557129238731e-05, "loss": 0.6477, "step": 7530 }, { "epoch": 0.34, "learning_rate": 1.3154478187843297e-05, "loss": 0.6014, "step": 7540 }, { "epoch": 0.34, "learning_rate": 1.3145399246447865e-05, "loss": 0.6334, "step": 7550 }, { "epoch": 0.34, "learning_rate": 1.3136320305052432e-05, "loss": 0.7918, "step": 7560 }, { "epoch": 0.34, "learning_rate": 1.3127241363657e-05, "loss": 0.6686, "step": 7570 }, { "epoch": 0.34, "learning_rate": 1.3118162422261564e-05, "loss": 0.6744, "step": 7580 }, { "epoch": 0.34, "learning_rate": 1.3109083480866132e-05, "loss": 0.6256, "step": 7590 }, { "epoch": 0.34, "learning_rate": 1.31000045394707e-05, "loss": 0.675, "step": 7600 }, { "epoch": 0.35, "learning_rate": 1.3090925598075267e-05, "loss": 0.7121, "step": 7610 }, { "epoch": 0.35, "learning_rate": 1.3081846656679833e-05, "loss": 0.6578, "step": 7620 }, { "epoch": 0.35, "learning_rate": 1.3072767715284398e-05, "loss": 0.6576, "step": 7630 }, { "epoch": 0.35, "learning_rate": 1.3063688773888964e-05, "loss": 0.6482, "step": 7640 }, { "epoch": 0.35, "learning_rate": 1.3054609832493532e-05, "loss": 0.5342, "step": 7650 }, { "epoch": 0.35, "learning_rate": 1.30455308910981e-05, "loss": 0.6906, "step": 7660 }, { "epoch": 0.35, "learning_rate": 1.3036451949702667e-05, "loss": 0.6277, "step": 7670 }, { "epoch": 0.35, "learning_rate": 1.3027373008307231e-05, "loss": 0.5578, "step": 7680 }, { "epoch": 0.35, "learning_rate": 1.3018294066911799e-05, "loss": 0.5428, "step": 7690 }, { "epoch": 0.35, "learning_rate": 1.3009215125516366e-05, "loss": 0.6779, "step": 7700 }, { "epoch": 0.35, "learning_rate": 1.3000136184120932e-05, "loss": 0.7418, "step": 7710 }, { "epoch": 0.35, "learning_rate": 1.29910572427255e-05, "loss": 0.6016, "step": 7720 }, { "epoch": 0.35, "learning_rate": 1.2981978301330065e-05, "loss": 0.7863, "step": 7730 }, { "epoch": 0.35, "learning_rate": 1.2972899359934631e-05, "loss": 0.6191, "step": 7740 }, { "epoch": 0.35, "learning_rate": 1.2963820418539199e-05, "loss": 0.6687, "step": 7750 }, { "epoch": 0.35, "learning_rate": 1.2954741477143766e-05, "loss": 0.6523, "step": 7760 }, { "epoch": 0.35, "learning_rate": 1.2945662535748334e-05, "loss": 0.7125, "step": 7770 }, { "epoch": 0.35, "learning_rate": 1.2936583594352901e-05, "loss": 0.5959, "step": 7780 }, { "epoch": 0.35, "learning_rate": 1.2927504652957466e-05, "loss": 0.6693, "step": 7790 }, { "epoch": 0.35, "learning_rate": 1.2918425711562033e-05, "loss": 0.6715, "step": 7800 }, { "epoch": 0.35, "learning_rate": 1.2909346770166599e-05, "loss": 0.7223, "step": 7810 }, { "epoch": 0.35, "learning_rate": 1.2900267828771166e-05, "loss": 0.6242, "step": 7820 }, { "epoch": 0.36, "learning_rate": 1.2891188887375734e-05, "loss": 0.7363, "step": 7830 }, { "epoch": 0.36, "learning_rate": 1.2882109945980298e-05, "loss": 0.6469, "step": 7840 }, { "epoch": 0.36, "learning_rate": 1.2873031004584866e-05, "loss": 0.632, "step": 7850 }, { "epoch": 0.36, "learning_rate": 1.2863952063189433e-05, "loss": 0.69, "step": 7860 }, { "epoch": 0.36, "learning_rate": 1.2854873121794e-05, "loss": 0.7389, "step": 7870 }, { "epoch": 0.36, "learning_rate": 1.2845794180398568e-05, "loss": 0.693, "step": 7880 }, { "epoch": 0.36, "learning_rate": 1.2836715239003133e-05, "loss": 0.6895, "step": 7890 }, { "epoch": 0.36, "learning_rate": 1.28276362976077e-05, "loss": 0.6652, "step": 7900 }, { "epoch": 0.36, "learning_rate": 1.2818557356212266e-05, "loss": 0.6574, "step": 7910 }, { "epoch": 0.36, "learning_rate": 1.2809478414816833e-05, "loss": 0.7434, "step": 7920 }, { "epoch": 0.36, "learning_rate": 1.2800399473421401e-05, "loss": 0.6785, "step": 7930 }, { "epoch": 0.36, "learning_rate": 1.2791320532025965e-05, "loss": 0.6727, "step": 7940 }, { "epoch": 0.36, "learning_rate": 1.2782241590630533e-05, "loss": 0.6723, "step": 7950 }, { "epoch": 0.36, "learning_rate": 1.27731626492351e-05, "loss": 0.7227, "step": 7960 }, { "epoch": 0.36, "learning_rate": 1.2764083707839668e-05, "loss": 0.6693, "step": 7970 }, { "epoch": 0.36, "learning_rate": 1.2755004766444235e-05, "loss": 0.5281, "step": 7980 }, { "epoch": 0.36, "learning_rate": 1.27459258250488e-05, "loss": 0.7488, "step": 7990 }, { "epoch": 0.36, "learning_rate": 1.2736846883653367e-05, "loss": 0.5973, "step": 8000 }, { "epoch": 0.36, "eval_accuracy": 0.6120231772090777, "eval_loss": 0.6748174428939819, "eval_runtime": 73.8533, "eval_samples_per_second": 56.084, "eval_steps_per_second": 14.028, "step": 8000 }, { "epoch": 0.36, "learning_rate": 1.2727767942257933e-05, "loss": 0.6354, "step": 8010 }, { "epoch": 0.36, "learning_rate": 1.27186890008625e-05, "loss": 0.6363, "step": 8020 }, { "epoch": 0.36, "learning_rate": 1.2709610059467068e-05, "loss": 0.7402, "step": 8030 }, { "epoch": 0.36, "learning_rate": 1.2700531118071632e-05, "loss": 0.675, "step": 8040 }, { "epoch": 0.37, "learning_rate": 1.26914521766762e-05, "loss": 0.7207, "step": 8050 }, { "epoch": 0.37, "learning_rate": 1.2682373235280767e-05, "loss": 0.6342, "step": 8060 }, { "epoch": 0.37, "learning_rate": 1.2673294293885335e-05, "loss": 0.6756, "step": 8070 }, { "epoch": 0.37, "learning_rate": 1.26642153524899e-05, "loss": 0.7234, "step": 8080 }, { "epoch": 0.37, "learning_rate": 1.2655136411094466e-05, "loss": 0.6652, "step": 8090 }, { "epoch": 0.37, "learning_rate": 1.2646057469699034e-05, "loss": 0.6008, "step": 8100 }, { "epoch": 0.37, "learning_rate": 1.26369785283036e-05, "loss": 0.7371, "step": 8110 }, { "epoch": 0.37, "learning_rate": 1.2627899586908167e-05, "loss": 0.6217, "step": 8120 }, { "epoch": 0.37, "learning_rate": 1.2618820645512735e-05, "loss": 0.6926, "step": 8130 }, { "epoch": 0.37, "learning_rate": 1.2609741704117302e-05, "loss": 0.6658, "step": 8140 }, { "epoch": 0.37, "learning_rate": 1.2600662762721867e-05, "loss": 0.6504, "step": 8150 }, { "epoch": 0.37, "learning_rate": 1.2591583821326434e-05, "loss": 0.8324, "step": 8160 }, { "epoch": 0.37, "learning_rate": 1.2582504879931002e-05, "loss": 0.6648, "step": 8170 }, { "epoch": 0.37, "learning_rate": 1.2573425938535568e-05, "loss": 0.6914, "step": 8180 }, { "epoch": 0.37, "learning_rate": 1.2564346997140135e-05, "loss": 0.6416, "step": 8190 }, { "epoch": 0.37, "learning_rate": 1.2555268055744701e-05, "loss": 0.5684, "step": 8200 }, { "epoch": 0.37, "learning_rate": 1.2546189114349267e-05, "loss": 0.6676, "step": 8210 }, { "epoch": 0.37, "learning_rate": 1.2537110172953834e-05, "loss": 0.6582, "step": 8220 }, { "epoch": 0.37, "learning_rate": 1.2528031231558402e-05, "loss": 0.7184, "step": 8230 }, { "epoch": 0.37, "learning_rate": 1.251895229016297e-05, "loss": 0.6281, "step": 8240 }, { "epoch": 0.37, "learning_rate": 1.2509873348767534e-05, "loss": 0.7473, "step": 8250 }, { "epoch": 0.37, "learning_rate": 1.2500794407372101e-05, "loss": 0.6387, "step": 8260 }, { "epoch": 0.38, "learning_rate": 1.2491715465976669e-05, "loss": 0.7762, "step": 8270 }, { "epoch": 0.38, "learning_rate": 1.2482636524581235e-05, "loss": 0.6533, "step": 8280 }, { "epoch": 0.38, "learning_rate": 1.2473557583185802e-05, "loss": 0.7139, "step": 8290 }, { "epoch": 0.38, "learning_rate": 1.2464478641790368e-05, "loss": 0.6283, "step": 8300 }, { "epoch": 0.38, "learning_rate": 1.2455399700394934e-05, "loss": 0.5828, "step": 8310 }, { "epoch": 0.38, "learning_rate": 1.2446320758999501e-05, "loss": 0.6191, "step": 8320 }, { "epoch": 0.38, "learning_rate": 1.2437241817604069e-05, "loss": 0.6521, "step": 8330 }, { "epoch": 0.38, "learning_rate": 1.2428162876208636e-05, "loss": 0.7365, "step": 8340 }, { "epoch": 0.38, "learning_rate": 1.24190839348132e-05, "loss": 0.6521, "step": 8350 }, { "epoch": 0.38, "learning_rate": 1.2410004993417768e-05, "loss": 0.7143, "step": 8360 }, { "epoch": 0.38, "learning_rate": 1.2400926052022336e-05, "loss": 0.5857, "step": 8370 }, { "epoch": 0.38, "learning_rate": 1.2391847110626901e-05, "loss": 0.6746, "step": 8380 }, { "epoch": 0.38, "learning_rate": 1.2382768169231469e-05, "loss": 0.7582, "step": 8390 }, { "epoch": 0.38, "learning_rate": 1.2373689227836035e-05, "loss": 0.6582, "step": 8400 }, { "epoch": 0.38, "learning_rate": 1.23646102864406e-05, "loss": 0.6844, "step": 8410 }, { "epoch": 0.38, "learning_rate": 1.2355531345045168e-05, "loss": 0.6281, "step": 8420 }, { "epoch": 0.38, "learning_rate": 1.2346452403649736e-05, "loss": 0.6521, "step": 8430 }, { "epoch": 0.38, "learning_rate": 1.2337373462254303e-05, "loss": 0.6479, "step": 8440 }, { "epoch": 0.38, "learning_rate": 1.2328294520858867e-05, "loss": 0.7461, "step": 8450 }, { "epoch": 0.38, "learning_rate": 1.2319215579463435e-05, "loss": 0.6861, "step": 8460 }, { "epoch": 0.38, "learning_rate": 1.2310136638068003e-05, "loss": 0.6818, "step": 8470 }, { "epoch": 0.38, "learning_rate": 1.2301057696672568e-05, "loss": 0.6973, "step": 8480 }, { "epoch": 0.39, "learning_rate": 1.2291978755277136e-05, "loss": 0.6914, "step": 8490 }, { "epoch": 0.39, "learning_rate": 1.2282899813881704e-05, "loss": 0.6592, "step": 8500 }, { "epoch": 0.39, "eval_accuracy": 0.613713182037663, "eval_loss": 0.6704452633857727, "eval_runtime": 73.8233, "eval_samples_per_second": 56.107, "eval_steps_per_second": 14.034, "step": 8500 }, { "epoch": 0.39, "learning_rate": 1.2273820872486268e-05, "loss": 0.6002, "step": 8510 }, { "epoch": 0.39, "learning_rate": 1.2264741931090835e-05, "loss": 0.5867, "step": 8520 }, { "epoch": 0.39, "learning_rate": 1.2255662989695403e-05, "loss": 0.6746, "step": 8530 }, { "epoch": 0.39, "learning_rate": 1.224658404829997e-05, "loss": 0.7316, "step": 8540 }, { "epoch": 0.39, "learning_rate": 1.2237505106904536e-05, "loss": 0.7844, "step": 8550 }, { "epoch": 0.39, "learning_rate": 1.2228426165509102e-05, "loss": 0.7021, "step": 8560 }, { "epoch": 0.39, "learning_rate": 1.221934722411367e-05, "loss": 0.6369, "step": 8570 }, { "epoch": 0.39, "learning_rate": 1.2210268282718235e-05, "loss": 0.6469, "step": 8580 }, { "epoch": 0.39, "learning_rate": 1.2201189341322803e-05, "loss": 0.7189, "step": 8590 }, { "epoch": 0.39, "learning_rate": 1.219211039992737e-05, "loss": 0.668, "step": 8600 }, { "epoch": 0.39, "learning_rate": 1.2183031458531935e-05, "loss": 0.6777, "step": 8610 }, { "epoch": 0.39, "learning_rate": 1.2173952517136502e-05, "loss": 0.5765, "step": 8620 }, { "epoch": 0.39, "learning_rate": 1.216487357574107e-05, "loss": 0.6963, "step": 8630 }, { "epoch": 0.39, "learning_rate": 1.2155794634345637e-05, "loss": 0.6502, "step": 8640 }, { "epoch": 0.39, "learning_rate": 1.2146715692950203e-05, "loss": 0.7574, "step": 8650 }, { "epoch": 0.39, "learning_rate": 1.2137636751554769e-05, "loss": 0.7723, "step": 8660 }, { "epoch": 0.39, "learning_rate": 1.2128557810159336e-05, "loss": 0.6846, "step": 8670 }, { "epoch": 0.39, "learning_rate": 1.2119478868763902e-05, "loss": 0.6258, "step": 8680 }, { "epoch": 0.39, "learning_rate": 1.211039992736847e-05, "loss": 0.6141, "step": 8690 }, { "epoch": 0.39, "learning_rate": 1.2101320985973037e-05, "loss": 0.576, "step": 8700 }, { "epoch": 0.4, "learning_rate": 1.2092242044577602e-05, "loss": 0.7633, "step": 8710 }, { "epoch": 0.4, "learning_rate": 1.2083163103182169e-05, "loss": 0.6477, "step": 8720 }, { "epoch": 0.4, "learning_rate": 1.2074084161786737e-05, "loss": 0.7104, "step": 8730 }, { "epoch": 0.4, "learning_rate": 1.2065005220391304e-05, "loss": 0.673, "step": 8740 }, { "epoch": 0.4, "learning_rate": 1.205592627899587e-05, "loss": 0.6516, "step": 8750 }, { "epoch": 0.4, "learning_rate": 1.2046847337600436e-05, "loss": 0.7199, "step": 8760 }, { "epoch": 0.4, "learning_rate": 1.2037768396205003e-05, "loss": 0.7305, "step": 8770 }, { "epoch": 0.4, "learning_rate": 1.202868945480957e-05, "loss": 0.6078, "step": 8780 }, { "epoch": 0.4, "learning_rate": 1.2019610513414137e-05, "loss": 0.6326, "step": 8790 }, { "epoch": 0.4, "learning_rate": 1.2010531572018704e-05, "loss": 0.6406, "step": 8800 }, { "epoch": 0.4, "learning_rate": 1.2001452630623269e-05, "loss": 0.7246, "step": 8810 }, { "epoch": 0.4, "learning_rate": 1.1992373689227836e-05, "loss": 0.7004, "step": 8820 }, { "epoch": 0.4, "learning_rate": 1.1983294747832404e-05, "loss": 0.6844, "step": 8830 }, { "epoch": 0.4, "learning_rate": 1.1974215806436971e-05, "loss": 0.7195, "step": 8840 }, { "epoch": 0.4, "learning_rate": 1.1965136865041537e-05, "loss": 0.6477, "step": 8850 }, { "epoch": 0.4, "learning_rate": 1.1956057923646103e-05, "loss": 0.7027, "step": 8860 }, { "epoch": 0.4, "learning_rate": 1.194697898225067e-05, "loss": 0.6791, "step": 8870 }, { "epoch": 0.4, "learning_rate": 1.1937900040855236e-05, "loss": 0.5969, "step": 8880 }, { "epoch": 0.4, "learning_rate": 1.1928821099459804e-05, "loss": 0.674, "step": 8890 }, { "epoch": 0.4, "learning_rate": 1.1919742158064371e-05, "loss": 0.6832, "step": 8900 }, { "epoch": 0.4, "learning_rate": 1.1910663216668939e-05, "loss": 0.6639, "step": 8910 }, { "epoch": 0.4, "learning_rate": 1.1901584275273503e-05, "loss": 0.6672, "step": 8920 }, { "epoch": 0.41, "learning_rate": 1.189250533387807e-05, "loss": 0.6502, "step": 8930 }, { "epoch": 0.41, "learning_rate": 1.1883426392482638e-05, "loss": 0.6389, "step": 8940 }, { "epoch": 0.41, "learning_rate": 1.1874347451087204e-05, "loss": 0.6297, "step": 8950 }, { "epoch": 0.41, "learning_rate": 1.1865268509691772e-05, "loss": 0.6564, "step": 8960 }, { "epoch": 0.41, "learning_rate": 1.1856189568296337e-05, "loss": 0.66, "step": 8970 }, { "epoch": 0.41, "learning_rate": 1.1847110626900903e-05, "loss": 0.8164, "step": 8980 }, { "epoch": 0.41, "learning_rate": 1.183803168550547e-05, "loss": 0.6418, "step": 8990 }, { "epoch": 0.41, "learning_rate": 1.1828952744110038e-05, "loss": 0.6805, "step": 9000 }, { "epoch": 0.41, "eval_accuracy": 0.6139546112988894, "eval_loss": 0.6687760353088379, "eval_runtime": 74.2738, "eval_samples_per_second": 55.767, "eval_steps_per_second": 13.948, "step": 9000 }, { "epoch": 0.41, "learning_rate": 1.1819873802714606e-05, "loss": 0.6879, "step": 9010 }, { "epoch": 0.41, "learning_rate": 1.181079486131917e-05, "loss": 0.7094, "step": 9020 }, { "epoch": 0.41, "learning_rate": 1.1801715919923738e-05, "loss": 0.6699, "step": 9030 }, { "epoch": 0.41, "learning_rate": 1.1792636978528305e-05, "loss": 0.7035, "step": 9040 }, { "epoch": 0.41, "learning_rate": 1.1783558037132871e-05, "loss": 0.7768, "step": 9050 }, { "epoch": 0.41, "learning_rate": 1.1774479095737438e-05, "loss": 0.6312, "step": 9060 }, { "epoch": 0.41, "learning_rate": 1.1765400154342004e-05, "loss": 0.658, "step": 9070 }, { "epoch": 0.41, "learning_rate": 1.175632121294657e-05, "loss": 0.6699, "step": 9080 }, { "epoch": 0.41, "learning_rate": 1.1747242271551138e-05, "loss": 0.7354, "step": 9090 }, { "epoch": 0.41, "learning_rate": 1.1738163330155705e-05, "loss": 0.6662, "step": 9100 }, { "epoch": 0.41, "learning_rate": 1.1729084388760273e-05, "loss": 0.6908, "step": 9110 }, { "epoch": 0.41, "learning_rate": 1.1720005447364837e-05, "loss": 0.7621, "step": 9120 }, { "epoch": 0.41, "learning_rate": 1.1710926505969405e-05, "loss": 0.7016, "step": 9130 }, { "epoch": 0.41, "learning_rate": 1.1701847564573972e-05, "loss": 0.6984, "step": 9140 }, { "epoch": 0.42, "learning_rate": 1.1692768623178538e-05, "loss": 0.6508, "step": 9150 }, { "epoch": 0.42, "learning_rate": 1.1683689681783105e-05, "loss": 0.6754, "step": 9160 }, { "epoch": 0.42, "learning_rate": 1.1674610740387671e-05, "loss": 0.6082, "step": 9170 }, { "epoch": 0.42, "learning_rate": 1.1665531798992237e-05, "loss": 0.651, "step": 9180 }, { "epoch": 0.42, "learning_rate": 1.1656452857596805e-05, "loss": 0.7121, "step": 9190 }, { "epoch": 0.42, "learning_rate": 1.1647373916201372e-05, "loss": 0.793, "step": 9200 }, { "epoch": 0.42, "learning_rate": 1.163829497480594e-05, "loss": 0.5602, "step": 9210 }, { "epoch": 0.42, "learning_rate": 1.1629216033410504e-05, "loss": 0.6713, "step": 9220 }, { "epoch": 0.42, "learning_rate": 1.1620137092015071e-05, "loss": 0.7727, "step": 9230 }, { "epoch": 0.42, "learning_rate": 1.1611058150619639e-05, "loss": 0.6102, "step": 9240 }, { "epoch": 0.42, "learning_rate": 1.1601979209224205e-05, "loss": 0.6553, "step": 9250 }, { "epoch": 0.42, "learning_rate": 1.1592900267828772e-05, "loss": 0.6861, "step": 9260 }, { "epoch": 0.42, "learning_rate": 1.158382132643334e-05, "loss": 0.6576, "step": 9270 }, { "epoch": 0.42, "learning_rate": 1.1574742385037904e-05, "loss": 0.6504, "step": 9280 }, { "epoch": 0.42, "learning_rate": 1.1565663443642472e-05, "loss": 0.6322, "step": 9290 }, { "epoch": 0.42, "learning_rate": 1.155658450224704e-05, "loss": 0.6512, "step": 9300 }, { "epoch": 0.42, "learning_rate": 1.1547505560851607e-05, "loss": 0.6479, "step": 9310 }, { "epoch": 0.42, "learning_rate": 1.1538426619456173e-05, "loss": 0.6904, "step": 9320 }, { "epoch": 0.42, "learning_rate": 1.1529347678060738e-05, "loss": 0.7137, "step": 9330 }, { "epoch": 0.42, "learning_rate": 1.1520268736665306e-05, "loss": 0.6318, "step": 9340 }, { "epoch": 0.42, "learning_rate": 1.1511189795269872e-05, "loss": 0.7457, "step": 9350 }, { "epoch": 0.42, "learning_rate": 1.150211085387444e-05, "loss": 0.6027, "step": 9360 }, { "epoch": 0.43, "learning_rate": 1.1493031912479007e-05, "loss": 0.6516, "step": 9370 }, { "epoch": 0.43, "learning_rate": 1.1483952971083571e-05, "loss": 0.6967, "step": 9380 }, { "epoch": 0.43, "learning_rate": 1.1474874029688139e-05, "loss": 0.7232, "step": 9390 }, { "epoch": 0.43, "learning_rate": 1.1465795088292706e-05, "loss": 0.7355, "step": 9400 }, { "epoch": 0.43, "learning_rate": 1.1456716146897274e-05, "loss": 0.8191, "step": 9410 }, { "epoch": 0.43, "learning_rate": 1.144763720550184e-05, "loss": 0.6443, "step": 9420 }, { "epoch": 0.43, "learning_rate": 1.1438558264106405e-05, "loss": 0.6482, "step": 9430 }, { "epoch": 0.43, "learning_rate": 1.1429479322710973e-05, "loss": 0.7633, "step": 9440 }, { "epoch": 0.43, "learning_rate": 1.1420400381315539e-05, "loss": 0.6391, "step": 9450 }, { "epoch": 0.43, "learning_rate": 1.1411321439920106e-05, "loss": 0.7008, "step": 9460 }, { "epoch": 0.43, "learning_rate": 1.1402242498524674e-05, "loss": 0.6922, "step": 9470 }, { "epoch": 0.43, "learning_rate": 1.1393163557129238e-05, "loss": 0.5777, "step": 9480 }, { "epoch": 0.43, "learning_rate": 1.1384084615733806e-05, "loss": 0.6977, "step": 9490 }, { "epoch": 0.43, "learning_rate": 1.1375005674338373e-05, "loss": 0.5785, "step": 9500 }, { "epoch": 0.43, "eval_accuracy": 0.6129888942539836, "eval_loss": 0.6736442446708679, "eval_runtime": 72.7311, "eval_samples_per_second": 56.949, "eval_steps_per_second": 14.244, "step": 9500 }, { "epoch": 0.43, "learning_rate": 1.136592673294294e-05, "loss": 0.5643, "step": 9510 }, { "epoch": 0.43, "learning_rate": 1.1356847791547507e-05, "loss": 0.6428, "step": 9520 }, { "epoch": 0.43, "learning_rate": 1.1347768850152072e-05, "loss": 0.6371, "step": 9530 }, { "epoch": 0.43, "learning_rate": 1.133868990875664e-05, "loss": 0.633, "step": 9540 }, { "epoch": 0.43, "learning_rate": 1.1329610967361206e-05, "loss": 0.7713, "step": 9550 }, { "epoch": 0.43, "learning_rate": 1.1320532025965773e-05, "loss": 0.7596, "step": 9560 }, { "epoch": 0.43, "learning_rate": 1.131145308457034e-05, "loss": 0.635, "step": 9570 }, { "epoch": 0.43, "learning_rate": 1.1302374143174905e-05, "loss": 0.6986, "step": 9580 }, { "epoch": 0.44, "learning_rate": 1.1293295201779473e-05, "loss": 0.6287, "step": 9590 }, { "epoch": 0.44, "learning_rate": 1.128421626038404e-05, "loss": 0.6723, "step": 9600 }, { "epoch": 0.44, "learning_rate": 1.1275137318988608e-05, "loss": 0.5947, "step": 9610 }, { "epoch": 0.44, "learning_rate": 1.1266058377593173e-05, "loss": 0.757, "step": 9620 }, { "epoch": 0.44, "learning_rate": 1.1256979436197741e-05, "loss": 0.5848, "step": 9630 }, { "epoch": 0.44, "learning_rate": 1.1247900494802307e-05, "loss": 0.7131, "step": 9640 }, { "epoch": 0.44, "learning_rate": 1.1238821553406873e-05, "loss": 0.6102, "step": 9650 }, { "epoch": 0.44, "learning_rate": 1.122974261201144e-05, "loss": 0.7449, "step": 9660 }, { "epoch": 0.44, "learning_rate": 1.1220663670616008e-05, "loss": 0.6512, "step": 9670 }, { "epoch": 0.44, "learning_rate": 1.1211584729220575e-05, "loss": 0.6586, "step": 9680 }, { "epoch": 0.44, "learning_rate": 1.120250578782514e-05, "loss": 0.6793, "step": 9690 }, { "epoch": 0.44, "learning_rate": 1.1193426846429707e-05, "loss": 0.6764, "step": 9700 }, { "epoch": 0.44, "learning_rate": 1.1184347905034275e-05, "loss": 0.7092, "step": 9710 }, { "epoch": 0.44, "learning_rate": 1.117526896363884e-05, "loss": 0.6264, "step": 9720 }, { "epoch": 0.44, "learning_rate": 1.1166190022243408e-05, "loss": 0.6221, "step": 9730 }, { "epoch": 0.44, "learning_rate": 1.1157111080847974e-05, "loss": 0.6859, "step": 9740 }, { "epoch": 0.44, "learning_rate": 1.114803213945254e-05, "loss": 0.6084, "step": 9750 }, { "epoch": 0.44, "learning_rate": 1.1138953198057107e-05, "loss": 0.6539, "step": 9760 }, { "epoch": 0.44, "learning_rate": 1.1129874256661675e-05, "loss": 0.6131, "step": 9770 }, { "epoch": 0.44, "learning_rate": 1.1120795315266242e-05, "loss": 0.6937, "step": 9780 }, { "epoch": 0.44, "learning_rate": 1.1111716373870806e-05, "loss": 0.7943, "step": 9790 }, { "epoch": 0.44, "learning_rate": 1.1102637432475374e-05, "loss": 0.6664, "step": 9800 }, { "epoch": 0.45, "learning_rate": 1.1093558491079942e-05, "loss": 0.7092, "step": 9810 }, { "epoch": 0.45, "learning_rate": 1.1084479549684507e-05, "loss": 0.6586, "step": 9820 }, { "epoch": 0.45, "learning_rate": 1.1075400608289075e-05, "loss": 0.6746, "step": 9830 }, { "epoch": 0.45, "learning_rate": 1.106632166689364e-05, "loss": 0.6631, "step": 9840 }, { "epoch": 0.45, "learning_rate": 1.1057242725498207e-05, "loss": 0.6672, "step": 9850 }, { "epoch": 0.45, "learning_rate": 1.1048163784102774e-05, "loss": 0.6475, "step": 9860 }, { "epoch": 0.45, "learning_rate": 1.1039084842707342e-05, "loss": 0.7523, "step": 9870 }, { "epoch": 0.45, "learning_rate": 1.103000590131191e-05, "loss": 0.6203, "step": 9880 }, { "epoch": 0.45, "learning_rate": 1.1020926959916473e-05, "loss": 0.6355, "step": 9890 }, { "epoch": 0.45, "learning_rate": 1.1011848018521041e-05, "loss": 0.5984, "step": 9900 }, { "epoch": 0.45, "learning_rate": 1.1002769077125609e-05, "loss": 0.7189, "step": 9910 }, { "epoch": 0.45, "learning_rate": 1.0993690135730174e-05, "loss": 0.7266, "step": 9920 }, { "epoch": 0.45, "learning_rate": 1.0984611194334742e-05, "loss": 0.7332, "step": 9930 }, { "epoch": 0.45, "learning_rate": 1.0975532252939308e-05, "loss": 0.6303, "step": 9940 }, { "epoch": 0.45, "learning_rate": 1.0966453311543874e-05, "loss": 0.5779, "step": 9950 }, { "epoch": 0.45, "learning_rate": 1.0957374370148441e-05, "loss": 0.648, "step": 9960 }, { "epoch": 0.45, "learning_rate": 1.0948295428753009e-05, "loss": 0.6674, "step": 9970 }, { "epoch": 0.45, "learning_rate": 1.0939216487357576e-05, "loss": 0.7422, "step": 9980 }, { "epoch": 0.45, "learning_rate": 1.093013754596214e-05, "loss": 0.7232, "step": 9990 }, { "epoch": 0.45, "learning_rate": 1.0921058604566708e-05, "loss": 0.742, "step": 10000 }, { "epoch": 0.45, "eval_accuracy": 0.6108160309029455, "eval_loss": 0.6693569421768188, "eval_runtime": 72.9525, "eval_samples_per_second": 56.777, "eval_steps_per_second": 14.201, "step": 10000 }, { "epoch": 0.45, "learning_rate": 1.0911979663171275e-05, "loss": 0.5943, "step": 10010 }, { "epoch": 0.45, "learning_rate": 1.0902900721775841e-05, "loss": 0.6332, "step": 10020 }, { "epoch": 0.46, "learning_rate": 1.0893821780380409e-05, "loss": 0.7078, "step": 10030 }, { "epoch": 0.46, "learning_rate": 1.0884742838984976e-05, "loss": 0.6781, "step": 10040 }, { "epoch": 0.46, "learning_rate": 1.087566389758954e-05, "loss": 0.7174, "step": 10050 }, { "epoch": 0.46, "learning_rate": 1.0866584956194108e-05, "loss": 0.727, "step": 10060 }, { "epoch": 0.46, "learning_rate": 1.0857506014798676e-05, "loss": 0.6676, "step": 10070 }, { "epoch": 0.46, "learning_rate": 1.0848427073403243e-05, "loss": 0.6469, "step": 10080 }, { "epoch": 0.46, "learning_rate": 1.0839348132007809e-05, "loss": 0.7363, "step": 10090 }, { "epoch": 0.46, "learning_rate": 1.0830269190612375e-05, "loss": 0.768, "step": 10100 }, { "epoch": 0.46, "learning_rate": 1.0821190249216942e-05, "loss": 0.6092, "step": 10110 }, { "epoch": 0.46, "learning_rate": 1.0812111307821508e-05, "loss": 0.6672, "step": 10120 }, { "epoch": 0.46, "learning_rate": 1.0803032366426076e-05, "loss": 0.6822, "step": 10130 }, { "epoch": 0.46, "learning_rate": 1.0793953425030643e-05, "loss": 0.6617, "step": 10140 }, { "epoch": 0.46, "learning_rate": 1.0784874483635208e-05, "loss": 0.6945, "step": 10150 }, { "epoch": 0.46, "learning_rate": 1.0775795542239775e-05, "loss": 0.6867, "step": 10160 }, { "epoch": 0.46, "learning_rate": 1.0766716600844343e-05, "loss": 0.6332, "step": 10170 }, { "epoch": 0.46, "learning_rate": 1.075763765944891e-05, "loss": 0.6752, "step": 10180 }, { "epoch": 0.46, "learning_rate": 1.0748558718053476e-05, "loss": 0.6369, "step": 10190 }, { "epoch": 0.46, "learning_rate": 1.0739479776658042e-05, "loss": 0.6684, "step": 10200 }, { "epoch": 0.46, "learning_rate": 1.073040083526261e-05, "loss": 0.5461, "step": 10210 }, { "epoch": 0.46, "learning_rate": 1.0721321893867175e-05, "loss": 0.7184, "step": 10220 }, { "epoch": 0.46, "learning_rate": 1.0712242952471743e-05, "loss": 0.7496, "step": 10230 }, { "epoch": 0.46, "learning_rate": 1.070316401107631e-05, "loss": 0.6715, "step": 10240 }, { "epoch": 0.47, "learning_rate": 1.0694085069680874e-05, "loss": 0.6252, "step": 10250 }, { "epoch": 0.47, "learning_rate": 1.0685006128285442e-05, "loss": 0.6961, "step": 10260 }, { "epoch": 0.47, "learning_rate": 1.067592718689001e-05, "loss": 0.716, "step": 10270 }, { "epoch": 0.47, "learning_rate": 1.0666848245494577e-05, "loss": 0.7164, "step": 10280 }, { "epoch": 0.47, "learning_rate": 1.0657769304099143e-05, "loss": 0.6246, "step": 10290 }, { "epoch": 0.47, "learning_rate": 1.0648690362703709e-05, "loss": 0.6184, "step": 10300 }, { "epoch": 0.47, "learning_rate": 1.0639611421308276e-05, "loss": 0.6963, "step": 10310 }, { "epoch": 0.47, "learning_rate": 1.0630532479912842e-05, "loss": 0.6121, "step": 10320 }, { "epoch": 0.47, "learning_rate": 1.062145353851741e-05, "loss": 0.652, "step": 10330 }, { "epoch": 0.47, "learning_rate": 1.0612374597121977e-05, "loss": 0.6271, "step": 10340 }, { "epoch": 0.47, "learning_rate": 1.0603295655726541e-05, "loss": 0.624, "step": 10350 }, { "epoch": 0.47, "learning_rate": 1.0594216714331109e-05, "loss": 0.6789, "step": 10360 }, { "epoch": 0.47, "learning_rate": 1.0585137772935677e-05, "loss": 0.615, "step": 10370 }, { "epoch": 0.47, "learning_rate": 1.0576058831540244e-05, "loss": 0.6789, "step": 10380 }, { "epoch": 0.47, "learning_rate": 1.056697989014481e-05, "loss": 0.6871, "step": 10390 }, { "epoch": 0.47, "learning_rate": 1.0557900948749377e-05, "loss": 0.6904, "step": 10400 }, { "epoch": 0.47, "learning_rate": 1.0548822007353943e-05, "loss": 0.6643, "step": 10410 }, { "epoch": 0.47, "learning_rate": 1.053974306595851e-05, "loss": 0.6654, "step": 10420 }, { "epoch": 0.47, "learning_rate": 1.0530664124563077e-05, "loss": 0.7045, "step": 10430 }, { "epoch": 0.47, "learning_rate": 1.0521585183167644e-05, "loss": 0.6736, "step": 10440 }, { "epoch": 0.47, "learning_rate": 1.0512506241772212e-05, "loss": 0.7609, "step": 10450 }, { "epoch": 0.47, "learning_rate": 1.0503427300376776e-05, "loss": 0.6813, "step": 10460 }, { "epoch": 0.48, "learning_rate": 1.0494348358981343e-05, "loss": 0.6705, "step": 10470 }, { "epoch": 0.48, "learning_rate": 1.0485269417585911e-05, "loss": 0.6914, "step": 10480 }, { "epoch": 0.48, "learning_rate": 1.0476190476190477e-05, "loss": 0.7188, "step": 10490 }, { "epoch": 0.48, "learning_rate": 1.0467111534795044e-05, "loss": 0.6529, "step": 10500 }, { "epoch": 0.48, "eval_accuracy": 0.6166103331723805, "eval_loss": 0.6667417883872986, "eval_runtime": 72.4713, "eval_samples_per_second": 57.154, "eval_steps_per_second": 14.295, "step": 10500 }, { "epoch": 0.48, "learning_rate": 1.045803259339961e-05, "loss": 0.6578, "step": 10510 }, { "epoch": 0.48, "learning_rate": 1.0448953652004176e-05, "loss": 0.7453, "step": 10520 }, { "epoch": 0.48, "learning_rate": 1.0439874710608744e-05, "loss": 0.6691, "step": 10530 }, { "epoch": 0.48, "learning_rate": 1.0430795769213311e-05, "loss": 0.6914, "step": 10540 }, { "epoch": 0.48, "learning_rate": 1.0421716827817879e-05, "loss": 0.7102, "step": 10550 }, { "epoch": 0.48, "learning_rate": 1.0412637886422443e-05, "loss": 0.6609, "step": 10560 }, { "epoch": 0.48, "learning_rate": 1.040355894502701e-05, "loss": 0.6842, "step": 10570 }, { "epoch": 0.48, "learning_rate": 1.0394480003631578e-05, "loss": 0.6832, "step": 10580 }, { "epoch": 0.48, "learning_rate": 1.0385401062236144e-05, "loss": 0.6773, "step": 10590 }, { "epoch": 0.48, "learning_rate": 1.0376322120840711e-05, "loss": 0.6449, "step": 10600 }, { "epoch": 0.48, "learning_rate": 1.0367243179445277e-05, "loss": 0.6217, "step": 10610 }, { "epoch": 0.48, "learning_rate": 1.0358164238049843e-05, "loss": 0.6234, "step": 10620 }, { "epoch": 0.48, "learning_rate": 1.034908529665441e-05, "loss": 0.5896, "step": 10630 }, { "epoch": 0.48, "learning_rate": 1.0340006355258978e-05, "loss": 0.6473, "step": 10640 }, { "epoch": 0.48, "learning_rate": 1.0330927413863546e-05, "loss": 0.7066, "step": 10650 }, { "epoch": 0.48, "learning_rate": 1.032184847246811e-05, "loss": 0.5883, "step": 10660 }, { "epoch": 0.48, "learning_rate": 1.0312769531072677e-05, "loss": 0.6541, "step": 10670 }, { "epoch": 0.48, "learning_rate": 1.0303690589677245e-05, "loss": 0.6373, "step": 10680 }, { "epoch": 0.49, "learning_rate": 1.029461164828181e-05, "loss": 0.7643, "step": 10690 }, { "epoch": 0.49, "learning_rate": 1.0285532706886378e-05, "loss": 0.6492, "step": 10700 }, { "epoch": 0.49, "learning_rate": 1.0276453765490944e-05, "loss": 0.7371, "step": 10710 }, { "epoch": 0.49, "learning_rate": 1.026737482409551e-05, "loss": 0.698, "step": 10720 }, { "epoch": 0.49, "learning_rate": 1.0258295882700078e-05, "loss": 0.7586, "step": 10730 }, { "epoch": 0.49, "learning_rate": 1.0249216941304645e-05, "loss": 0.6699, "step": 10740 }, { "epoch": 0.49, "learning_rate": 1.0240137999909213e-05, "loss": 0.708, "step": 10750 }, { "epoch": 0.49, "learning_rate": 1.0231059058513779e-05, "loss": 0.6816, "step": 10760 }, { "epoch": 0.49, "learning_rate": 1.0221980117118344e-05, "loss": 0.6832, "step": 10770 }, { "epoch": 0.49, "learning_rate": 1.0212901175722912e-05, "loss": 0.7, "step": 10780 }, { "epoch": 0.49, "learning_rate": 1.0203822234327478e-05, "loss": 0.6459, "step": 10790 }, { "epoch": 0.49, "learning_rate": 1.0194743292932045e-05, "loss": 0.6645, "step": 10800 }, { "epoch": 0.49, "learning_rate": 1.0185664351536613e-05, "loss": 0.6486, "step": 10810 }, { "epoch": 0.49, "learning_rate": 1.0176585410141177e-05, "loss": 0.6078, "step": 10820 }, { "epoch": 0.49, "learning_rate": 1.0167506468745745e-05, "loss": 0.6418, "step": 10830 }, { "epoch": 0.49, "learning_rate": 1.0158427527350312e-05, "loss": 0.6291, "step": 10840 }, { "epoch": 0.49, "learning_rate": 1.014934858595488e-05, "loss": 0.6855, "step": 10850 }, { "epoch": 0.49, "learning_rate": 1.0140269644559445e-05, "loss": 0.6988, "step": 10860 }, { "epoch": 0.49, "learning_rate": 1.0131190703164011e-05, "loss": 0.6133, "step": 10870 }, { "epoch": 0.49, "learning_rate": 1.0122111761768579e-05, "loss": 0.607, "step": 10880 }, { "epoch": 0.49, "learning_rate": 1.0113032820373145e-05, "loss": 0.7082, "step": 10890 }, { "epoch": 0.49, "learning_rate": 1.0103953878977712e-05, "loss": 0.6539, "step": 10900 }, { "epoch": 0.5, "learning_rate": 1.009487493758228e-05, "loss": 0.6391, "step": 10910 }, { "epoch": 0.5, "learning_rate": 1.0085795996186844e-05, "loss": 0.7664, "step": 10920 }, { "epoch": 0.5, "learning_rate": 1.0076717054791412e-05, "loss": 0.7229, "step": 10930 }, { "epoch": 0.5, "learning_rate": 1.0067638113395979e-05, "loss": 0.6549, "step": 10940 }, { "epoch": 0.5, "learning_rate": 1.0058559172000547e-05, "loss": 0.7496, "step": 10950 }, { "epoch": 0.5, "learning_rate": 1.0049480230605112e-05, "loss": 0.743, "step": 10960 }, { "epoch": 0.5, "learning_rate": 1.0040401289209678e-05, "loss": 0.7393, "step": 10970 }, { "epoch": 0.5, "learning_rate": 1.0031322347814246e-05, "loss": 0.7133, "step": 10980 }, { "epoch": 0.5, "learning_rate": 1.0022243406418812e-05, "loss": 0.6711, "step": 10990 }, { "epoch": 0.5, "learning_rate": 1.001316446502338e-05, "loss": 0.6803, "step": 11000 }, { "epoch": 0.5, "eval_accuracy": 0.6214389183969097, "eval_loss": 0.6656657457351685, "eval_runtime": 72.6193, "eval_samples_per_second": 57.037, "eval_steps_per_second": 14.266, "step": 11000 }, { "epoch": 0.5, "learning_rate": 1.0004085523627947e-05, "loss": 0.6867, "step": 11010 }, { "epoch": 0.5, "learning_rate": 9.995006582232513e-06, "loss": 0.6211, "step": 11020 }, { "epoch": 0.5, "learning_rate": 9.985927640837078e-06, "loss": 0.7129, "step": 11030 }, { "epoch": 0.5, "learning_rate": 9.976848699441646e-06, "loss": 0.7109, "step": 11040 }, { "epoch": 0.5, "learning_rate": 9.967769758046214e-06, "loss": 0.6867, "step": 11050 }, { "epoch": 0.5, "learning_rate": 9.95869081665078e-06, "loss": 0.6592, "step": 11060 }, { "epoch": 0.5, "learning_rate": 9.949611875255345e-06, "loss": 0.6264, "step": 11070 }, { "epoch": 0.5, "learning_rate": 9.940532933859913e-06, "loss": 0.6008, "step": 11080 }, { "epoch": 0.5, "learning_rate": 9.931453992464479e-06, "loss": 0.684, "step": 11090 }, { "epoch": 0.5, "learning_rate": 9.922375051069046e-06, "loss": 0.6029, "step": 11100 }, { "epoch": 0.5, "learning_rate": 9.913296109673612e-06, "loss": 0.6953, "step": 11110 }, { "epoch": 0.5, "learning_rate": 9.90421716827818e-06, "loss": 0.6785, "step": 11120 }, { "epoch": 0.51, "learning_rate": 9.895138226882747e-06, "loss": 0.5875, "step": 11130 }, { "epoch": 0.51, "learning_rate": 9.886059285487313e-06, "loss": 0.6793, "step": 11140 }, { "epoch": 0.51, "learning_rate": 9.87698034409188e-06, "loss": 0.6586, "step": 11150 }, { "epoch": 0.51, "learning_rate": 9.867901402696446e-06, "loss": 0.607, "step": 11160 }, { "epoch": 0.51, "learning_rate": 9.858822461301012e-06, "loss": 0.5693, "step": 11170 }, { "epoch": 0.51, "learning_rate": 9.84974351990558e-06, "loss": 0.6781, "step": 11180 }, { "epoch": 0.51, "learning_rate": 9.840664578510146e-06, "loss": 0.6658, "step": 11190 }, { "epoch": 0.51, "learning_rate": 9.831585637114713e-06, "loss": 0.7047, "step": 11200 }, { "epoch": 0.51, "learning_rate": 9.822506695719279e-06, "loss": 0.7633, "step": 11210 }, { "epoch": 0.51, "learning_rate": 9.813427754323847e-06, "loss": 0.6619, "step": 11220 }, { "epoch": 0.51, "learning_rate": 9.804348812928414e-06, "loss": 0.6832, "step": 11230 }, { "epoch": 0.51, "learning_rate": 9.79526987153298e-06, "loss": 0.6477, "step": 11240 }, { "epoch": 0.51, "learning_rate": 9.786190930137547e-06, "loss": 0.6107, "step": 11250 }, { "epoch": 0.51, "learning_rate": 9.777111988742113e-06, "loss": 0.658, "step": 11260 }, { "epoch": 0.51, "learning_rate": 9.76803304734668e-06, "loss": 0.6137, "step": 11270 }, { "epoch": 0.51, "learning_rate": 9.758954105951247e-06, "loss": 0.5883, "step": 11280 }, { "epoch": 0.51, "learning_rate": 9.749875164555813e-06, "loss": 0.683, "step": 11290 }, { "epoch": 0.51, "learning_rate": 9.74079622316038e-06, "loss": 0.6953, "step": 11300 }, { "epoch": 0.51, "learning_rate": 9.731717281764948e-06, "loss": 0.5729, "step": 11310 }, { "epoch": 0.51, "learning_rate": 9.722638340369514e-06, "loss": 0.7637, "step": 11320 }, { "epoch": 0.51, "learning_rate": 9.713559398974081e-06, "loss": 0.7246, "step": 11330 }, { "epoch": 0.51, "learning_rate": 9.704480457578647e-06, "loss": 0.6527, "step": 11340 }, { "epoch": 0.52, "learning_rate": 9.695401516183214e-06, "loss": 0.7195, "step": 11350 }, { "epoch": 0.52, "learning_rate": 9.68632257478778e-06, "loss": 0.6072, "step": 11360 }, { "epoch": 0.52, "learning_rate": 9.677243633392346e-06, "loss": 0.6926, "step": 11370 }, { "epoch": 0.52, "learning_rate": 9.668164691996914e-06, "loss": 0.649, "step": 11380 }, { "epoch": 0.52, "learning_rate": 9.65908575060148e-06, "loss": 0.5658, "step": 11390 }, { "epoch": 0.52, "learning_rate": 9.650006809206047e-06, "loss": 0.7312, "step": 11400 }, { "epoch": 0.52, "learning_rate": 9.640927867810615e-06, "loss": 0.6996, "step": 11410 }, { "epoch": 0.52, "learning_rate": 9.63184892641518e-06, "loss": 0.6453, "step": 11420 }, { "epoch": 0.52, "learning_rate": 9.622769985019748e-06, "loss": 0.6148, "step": 11430 }, { "epoch": 0.52, "learning_rate": 9.613691043624314e-06, "loss": 0.734, "step": 11440 }, { "epoch": 0.52, "learning_rate": 9.604612102228881e-06, "loss": 0.633, "step": 11450 }, { "epoch": 0.52, "learning_rate": 9.595533160833447e-06, "loss": 0.6434, "step": 11460 }, { "epoch": 0.52, "learning_rate": 9.586454219438013e-06, "loss": 0.6508, "step": 11470 }, { "epoch": 0.52, "learning_rate": 9.57737527804258e-06, "loss": 0.7098, "step": 11480 }, { "epoch": 0.52, "learning_rate": 9.568296336647148e-06, "loss": 0.6662, "step": 11490 }, { "epoch": 0.52, "learning_rate": 9.559217395251714e-06, "loss": 0.6588, "step": 11500 }, { "epoch": 0.52, "eval_accuracy": 0.6219217769193627, "eval_loss": 0.6640219688415527, "eval_runtime": 74.4423, "eval_samples_per_second": 55.64, "eval_steps_per_second": 13.917, "step": 11500 }, { "epoch": 0.52, "learning_rate": 9.550138453856282e-06, "loss": 0.7092, "step": 11510 }, { "epoch": 0.52, "learning_rate": 9.541059512460847e-06, "loss": 0.6582, "step": 11520 }, { "epoch": 0.52, "learning_rate": 9.531980571065415e-06, "loss": 0.6588, "step": 11530 }, { "epoch": 0.52, "learning_rate": 9.52290162966998e-06, "loss": 0.7258, "step": 11540 }, { "epoch": 0.52, "learning_rate": 9.513822688274548e-06, "loss": 0.7121, "step": 11550 }, { "epoch": 0.52, "learning_rate": 9.504743746879114e-06, "loss": 0.6375, "step": 11560 }, { "epoch": 0.53, "learning_rate": 9.49566480548368e-06, "loss": 0.6449, "step": 11570 }, { "epoch": 0.53, "learning_rate": 9.486585864088248e-06, "loss": 0.7689, "step": 11580 }, { "epoch": 0.53, "learning_rate": 9.477506922692815e-06, "loss": 0.6449, "step": 11590 }, { "epoch": 0.53, "learning_rate": 9.468427981297381e-06, "loss": 0.7828, "step": 11600 }, { "epoch": 0.53, "learning_rate": 9.459349039901949e-06, "loss": 0.5143, "step": 11610 }, { "epoch": 0.53, "learning_rate": 9.450270098506516e-06, "loss": 0.6818, "step": 11620 }, { "epoch": 0.53, "learning_rate": 9.441191157111082e-06, "loss": 0.6859, "step": 11630 }, { "epoch": 0.53, "learning_rate": 9.432112215715648e-06, "loss": 0.5936, "step": 11640 }, { "epoch": 0.53, "learning_rate": 9.423033274320215e-06, "loss": 0.6316, "step": 11650 }, { "epoch": 0.53, "learning_rate": 9.413954332924781e-06, "loss": 0.6494, "step": 11660 }, { "epoch": 0.53, "learning_rate": 9.404875391529349e-06, "loss": 0.6939, "step": 11670 }, { "epoch": 0.53, "learning_rate": 9.395796450133915e-06, "loss": 0.6666, "step": 11680 }, { "epoch": 0.53, "learning_rate": 9.386717508738482e-06, "loss": 0.6594, "step": 11690 }, { "epoch": 0.53, "learning_rate": 9.377638567343048e-06, "loss": 0.6082, "step": 11700 }, { "epoch": 0.53, "learning_rate": 9.368559625947616e-06, "loss": 0.6211, "step": 11710 }, { "epoch": 0.53, "learning_rate": 9.359480684552183e-06, "loss": 0.7973, "step": 11720 }, { "epoch": 0.53, "learning_rate": 9.350401743156749e-06, "loss": 0.7477, "step": 11730 }, { "epoch": 0.53, "learning_rate": 9.341322801761315e-06, "loss": 0.5236, "step": 11740 }, { "epoch": 0.53, "learning_rate": 9.332243860365882e-06, "loss": 0.6607, "step": 11750 }, { "epoch": 0.53, "learning_rate": 9.323164918970448e-06, "loss": 0.6092, "step": 11760 }, { "epoch": 0.53, "learning_rate": 9.314085977575016e-06, "loss": 0.6771, "step": 11770 }, { "epoch": 0.53, "learning_rate": 9.305007036179582e-06, "loss": 0.6631, "step": 11780 }, { "epoch": 0.54, "learning_rate": 9.295928094784149e-06, "loss": 0.6256, "step": 11790 }, { "epoch": 0.54, "learning_rate": 9.286849153388717e-06, "loss": 0.7223, "step": 11800 }, { "epoch": 0.54, "learning_rate": 9.277770211993282e-06, "loss": 0.759, "step": 11810 }, { "epoch": 0.54, "learning_rate": 9.26869127059785e-06, "loss": 0.6131, "step": 11820 }, { "epoch": 0.54, "learning_rate": 9.259612329202416e-06, "loss": 0.6332, "step": 11830 }, { "epoch": 0.54, "learning_rate": 9.250533387806982e-06, "loss": 0.6002, "step": 11840 }, { "epoch": 0.54, "learning_rate": 9.24145444641155e-06, "loss": 0.5533, "step": 11850 }, { "epoch": 0.54, "learning_rate": 9.232375505016115e-06, "loss": 0.6191, "step": 11860 }, { "epoch": 0.54, "learning_rate": 9.223296563620683e-06, "loss": 0.7561, "step": 11870 }, { "epoch": 0.54, "learning_rate": 9.214217622225248e-06, "loss": 0.6852, "step": 11880 }, { "epoch": 0.54, "learning_rate": 9.205138680829816e-06, "loss": 0.7559, "step": 11890 }, { "epoch": 0.54, "learning_rate": 9.196059739434384e-06, "loss": 0.6805, "step": 11900 }, { "epoch": 0.54, "learning_rate": 9.18698079803895e-06, "loss": 0.6937, "step": 11910 }, { "epoch": 0.54, "learning_rate": 9.177901856643517e-06, "loss": 0.766, "step": 11920 }, { "epoch": 0.54, "learning_rate": 9.168822915248083e-06, "loss": 0.6945, "step": 11930 }, { "epoch": 0.54, "learning_rate": 9.159743973852649e-06, "loss": 0.5963, "step": 11940 }, { "epoch": 0.54, "learning_rate": 9.150665032457216e-06, "loss": 0.5949, "step": 11950 }, { "epoch": 0.54, "learning_rate": 9.141586091061782e-06, "loss": 0.7, "step": 11960 }, { "epoch": 0.54, "learning_rate": 9.13250714966635e-06, "loss": 0.6602, "step": 11970 }, { "epoch": 0.54, "learning_rate": 9.123428208270917e-06, "loss": 0.6168, "step": 11980 }, { "epoch": 0.54, "learning_rate": 9.114349266875483e-06, "loss": 0.65, "step": 11990 }, { "epoch": 0.54, "learning_rate": 9.10527032548005e-06, "loss": 0.5652, "step": 12000 }, { "epoch": 0.54, "eval_accuracy": 0.6262675036214389, "eval_loss": 0.6643086671829224, "eval_runtime": 74.1561, "eval_samples_per_second": 55.855, "eval_steps_per_second": 13.971, "step": 12000 }, { "epoch": 0.55, "learning_rate": 9.096191384084616e-06, "loss": 0.5828, "step": 12010 }, { "epoch": 0.55, "learning_rate": 9.087112442689184e-06, "loss": 0.6129, "step": 12020 }, { "epoch": 0.55, "learning_rate": 9.07803350129375e-06, "loss": 0.6373, "step": 12030 }, { "epoch": 0.55, "learning_rate": 9.068954559898316e-06, "loss": 0.5529, "step": 12040 }, { "epoch": 0.55, "learning_rate": 9.059875618502883e-06, "loss": 0.5791, "step": 12050 }, { "epoch": 0.55, "learning_rate": 9.050796677107449e-06, "loss": 0.7039, "step": 12060 }, { "epoch": 0.55, "learning_rate": 9.041717735712017e-06, "loss": 0.6314, "step": 12070 }, { "epoch": 0.55, "learning_rate": 9.032638794316584e-06, "loss": 0.8344, "step": 12080 }, { "epoch": 0.55, "learning_rate": 9.02355985292115e-06, "loss": 0.6568, "step": 12090 }, { "epoch": 0.55, "learning_rate": 9.014480911525718e-06, "loss": 0.6854, "step": 12100 }, { "epoch": 0.55, "learning_rate": 9.005401970130283e-06, "loss": 0.7133, "step": 12110 }, { "epoch": 0.55, "learning_rate": 8.996323028734851e-06, "loss": 0.59, "step": 12120 }, { "epoch": 0.55, "learning_rate": 8.987244087339417e-06, "loss": 0.7049, "step": 12130 }, { "epoch": 0.55, "learning_rate": 8.978165145943983e-06, "loss": 0.5949, "step": 12140 }, { "epoch": 0.55, "learning_rate": 8.96908620454855e-06, "loss": 0.6137, "step": 12150 }, { "epoch": 0.55, "learning_rate": 8.960007263153116e-06, "loss": 0.8039, "step": 12160 }, { "epoch": 0.55, "learning_rate": 8.950928321757684e-06, "loss": 0.5955, "step": 12170 }, { "epoch": 0.55, "learning_rate": 8.941849380362251e-06, "loss": 0.558, "step": 12180 }, { "epoch": 0.55, "learning_rate": 8.932770438966817e-06, "loss": 0.6453, "step": 12190 }, { "epoch": 0.55, "learning_rate": 8.923691497571384e-06, "loss": 0.5607, "step": 12200 }, { "epoch": 0.55, "learning_rate": 8.91461255617595e-06, "loss": 0.6797, "step": 12210 }, { "epoch": 0.55, "learning_rate": 8.905533614780518e-06, "loss": 0.6105, "step": 12220 }, { "epoch": 0.56, "learning_rate": 8.896454673385084e-06, "loss": 0.607, "step": 12230 }, { "epoch": 0.56, "learning_rate": 8.88737573198965e-06, "loss": 0.7182, "step": 12240 }, { "epoch": 0.56, "learning_rate": 8.878296790594217e-06, "loss": 0.6537, "step": 12250 }, { "epoch": 0.56, "learning_rate": 8.869217849198785e-06, "loss": 0.8063, "step": 12260 }, { "epoch": 0.56, "learning_rate": 8.86013890780335e-06, "loss": 0.5801, "step": 12270 }, { "epoch": 0.56, "learning_rate": 8.851059966407918e-06, "loss": 0.707, "step": 12280 }, { "epoch": 0.56, "learning_rate": 8.841981025012484e-06, "loss": 0.7332, "step": 12290 }, { "epoch": 0.56, "learning_rate": 8.832902083617051e-06, "loss": 0.7797, "step": 12300 }, { "epoch": 0.56, "learning_rate": 8.823823142221617e-06, "loss": 0.7146, "step": 12310 }, { "epoch": 0.56, "learning_rate": 8.814744200826185e-06, "loss": 0.667, "step": 12320 }, { "epoch": 0.56, "learning_rate": 8.80566525943075e-06, "loss": 0.7193, "step": 12330 }, { "epoch": 0.56, "learning_rate": 8.796586318035317e-06, "loss": 0.7027, "step": 12340 }, { "epoch": 0.56, "learning_rate": 8.787507376639884e-06, "loss": 0.6092, "step": 12350 }, { "epoch": 0.56, "learning_rate": 8.778428435244452e-06, "loss": 0.6668, "step": 12360 }, { "epoch": 0.56, "learning_rate": 8.769349493849017e-06, "loss": 0.7051, "step": 12370 }, { "epoch": 0.56, "learning_rate": 8.760270552453585e-06, "loss": 0.7043, "step": 12380 }, { "epoch": 0.56, "learning_rate": 8.751191611058153e-06, "loss": 0.7746, "step": 12390 }, { "epoch": 0.56, "learning_rate": 8.742112669662718e-06, "loss": 0.5826, "step": 12400 }, { "epoch": 0.56, "learning_rate": 8.733033728267284e-06, "loss": 0.6295, "step": 12410 }, { "epoch": 0.56, "learning_rate": 8.723954786871852e-06, "loss": 0.6553, "step": 12420 }, { "epoch": 0.56, "learning_rate": 8.714875845476418e-06, "loss": 0.6766, "step": 12430 }, { "epoch": 0.56, "learning_rate": 8.705796904080985e-06, "loss": 0.5719, "step": 12440 }, { "epoch": 0.57, "learning_rate": 8.696717962685551e-06, "loss": 0.6652, "step": 12450 }, { "epoch": 0.57, "learning_rate": 8.687639021290119e-06, "loss": 0.7088, "step": 12460 }, { "epoch": 0.57, "learning_rate": 8.678560079894684e-06, "loss": 0.6367, "step": 12470 }, { "epoch": 0.57, "learning_rate": 8.669481138499252e-06, "loss": 0.7176, "step": 12480 }, { "epoch": 0.57, "learning_rate": 8.66040219710382e-06, "loss": 0.8223, "step": 12490 }, { "epoch": 0.57, "learning_rate": 8.651323255708385e-06, "loss": 0.7406, "step": 12500 }, { "epoch": 0.57, "eval_accuracy": 0.6284403669724771, "eval_loss": 0.6642463803291321, "eval_runtime": 74.8866, "eval_samples_per_second": 55.31, "eval_steps_per_second": 13.834, "step": 12500 }, { "epoch": 0.57, "learning_rate": 8.642244314312951e-06, "loss": 0.7641, "step": 12510 }, { "epoch": 0.57, "learning_rate": 8.633165372917519e-06, "loss": 0.6031, "step": 12520 }, { "epoch": 0.57, "learning_rate": 8.624086431522085e-06, "loss": 0.6365, "step": 12530 }, { "epoch": 0.57, "learning_rate": 8.615007490126652e-06, "loss": 0.5977, "step": 12540 }, { "epoch": 0.57, "learning_rate": 8.605928548731218e-06, "loss": 0.6783, "step": 12550 }, { "epoch": 0.57, "learning_rate": 8.596849607335786e-06, "loss": 0.7094, "step": 12560 }, { "epoch": 0.57, "learning_rate": 8.587770665940353e-06, "loss": 0.6758, "step": 12570 }, { "epoch": 0.57, "learning_rate": 8.578691724544919e-06, "loss": 0.7918, "step": 12580 }, { "epoch": 0.57, "learning_rate": 8.569612783149486e-06, "loss": 0.6793, "step": 12590 }, { "epoch": 0.57, "learning_rate": 8.560533841754052e-06, "loss": 0.7588, "step": 12600 }, { "epoch": 0.57, "learning_rate": 8.551454900358618e-06, "loss": 0.6875, "step": 12610 }, { "epoch": 0.57, "learning_rate": 8.542375958963186e-06, "loss": 0.5982, "step": 12620 }, { "epoch": 0.57, "learning_rate": 8.533297017567752e-06, "loss": 0.7289, "step": 12630 }, { "epoch": 0.57, "learning_rate": 8.524218076172319e-06, "loss": 0.641, "step": 12640 }, { "epoch": 0.57, "learning_rate": 8.515139134776885e-06, "loss": 0.6434, "step": 12650 }, { "epoch": 0.57, "learning_rate": 8.506060193381452e-06, "loss": 0.6051, "step": 12660 }, { "epoch": 0.58, "learning_rate": 8.49698125198602e-06, "loss": 0.7082, "step": 12670 }, { "epoch": 0.58, "learning_rate": 8.487902310590586e-06, "loss": 0.5811, "step": 12680 }, { "epoch": 0.58, "learning_rate": 8.478823369195153e-06, "loss": 0.573, "step": 12690 }, { "epoch": 0.58, "learning_rate": 8.46974442779972e-06, "loss": 0.6701, "step": 12700 }, { "epoch": 0.58, "learning_rate": 8.460665486404285e-06, "loss": 0.6824, "step": 12710 }, { "epoch": 0.58, "learning_rate": 8.451586545008853e-06, "loss": 0.5939, "step": 12720 }, { "epoch": 0.58, "learning_rate": 8.442507603613419e-06, "loss": 0.5402, "step": 12730 }, { "epoch": 0.58, "learning_rate": 8.433428662217986e-06, "loss": 0.7176, "step": 12740 }, { "epoch": 0.58, "learning_rate": 8.424349720822554e-06, "loss": 0.7102, "step": 12750 }, { "epoch": 0.58, "learning_rate": 8.41527077942712e-06, "loss": 0.7031, "step": 12760 }, { "epoch": 0.58, "learning_rate": 8.406191838031687e-06, "loss": 0.6232, "step": 12770 }, { "epoch": 0.58, "learning_rate": 8.397112896636253e-06, "loss": 0.7289, "step": 12780 }, { "epoch": 0.58, "learning_rate": 8.38803395524082e-06, "loss": 0.6281, "step": 12790 }, { "epoch": 0.58, "learning_rate": 8.378955013845386e-06, "loss": 0.6742, "step": 12800 }, { "epoch": 0.58, "learning_rate": 8.369876072449952e-06, "loss": 0.5912, "step": 12810 }, { "epoch": 0.58, "learning_rate": 8.36079713105452e-06, "loss": 0.6687, "step": 12820 }, { "epoch": 0.58, "learning_rate": 8.351718189659085e-06, "loss": 0.6396, "step": 12830 }, { "epoch": 0.58, "learning_rate": 8.342639248263653e-06, "loss": 0.6611, "step": 12840 }, { "epoch": 0.58, "learning_rate": 8.33356030686822e-06, "loss": 0.7992, "step": 12850 }, { "epoch": 0.58, "learning_rate": 8.324481365472786e-06, "loss": 0.5539, "step": 12860 }, { "epoch": 0.58, "learning_rate": 8.315402424077354e-06, "loss": 0.6166, "step": 12870 }, { "epoch": 0.58, "learning_rate": 8.30632348268192e-06, "loss": 0.6248, "step": 12880 }, { "epoch": 0.59, "learning_rate": 8.297244541286487e-06, "loss": 0.6965, "step": 12890 }, { "epoch": 0.59, "learning_rate": 8.288165599891053e-06, "loss": 0.6883, "step": 12900 }, { "epoch": 0.59, "learning_rate": 8.279086658495619e-06, "loss": 0.6459, "step": 12910 }, { "epoch": 0.59, "learning_rate": 8.270007717100187e-06, "loss": 0.6266, "step": 12920 }, { "epoch": 0.59, "learning_rate": 8.260928775704754e-06, "loss": 0.6408, "step": 12930 }, { "epoch": 0.59, "learning_rate": 8.25184983430932e-06, "loss": 0.6605, "step": 12940 }, { "epoch": 0.59, "learning_rate": 8.242770892913888e-06, "loss": 0.6299, "step": 12950 }, { "epoch": 0.59, "learning_rate": 8.233691951518453e-06, "loss": 0.7291, "step": 12960 }, { "epoch": 0.59, "learning_rate": 8.224613010123021e-06, "loss": 0.5949, "step": 12970 }, { "epoch": 0.59, "learning_rate": 8.215534068727587e-06, "loss": 0.6488, "step": 12980 }, { "epoch": 0.59, "learning_rate": 8.206455127332154e-06, "loss": 0.7426, "step": 12990 }, { "epoch": 0.59, "learning_rate": 8.19737618593672e-06, "loss": 0.6742, "step": 13000 }, { "epoch": 0.59, "eval_accuracy": 0.6238532110091743, "eval_loss": 0.6639795303344727, "eval_runtime": 73.0379, "eval_samples_per_second": 56.71, "eval_steps_per_second": 14.184, "step": 13000 }, { "epoch": 0.59, "learning_rate": 8.188297244541286e-06, "loss": 0.6525, "step": 13010 }, { "epoch": 0.59, "learning_rate": 8.179218303145854e-06, "loss": 0.7201, "step": 13020 }, { "epoch": 0.59, "learning_rate": 8.170139361750421e-06, "loss": 0.5959, "step": 13030 }, { "epoch": 0.59, "learning_rate": 8.161060420354987e-06, "loss": 0.6248, "step": 13040 }, { "epoch": 0.59, "learning_rate": 8.151981478959554e-06, "loss": 0.7281, "step": 13050 }, { "epoch": 0.59, "learning_rate": 8.14290253756412e-06, "loss": 0.7008, "step": 13060 }, { "epoch": 0.59, "learning_rate": 8.133823596168688e-06, "loss": 0.6934, "step": 13070 }, { "epoch": 0.59, "learning_rate": 8.124744654773254e-06, "loss": 0.7391, "step": 13080 }, { "epoch": 0.59, "learning_rate": 8.115665713377821e-06, "loss": 0.6283, "step": 13090 }, { "epoch": 0.59, "learning_rate": 8.106586771982387e-06, "loss": 0.634, "step": 13100 }, { "epoch": 0.6, "learning_rate": 8.097507830586955e-06, "loss": 0.6631, "step": 13110 }, { "epoch": 0.6, "learning_rate": 8.08842888919152e-06, "loss": 0.6844, "step": 13120 }, { "epoch": 0.6, "learning_rate": 8.079349947796088e-06, "loss": 0.5783, "step": 13130 }, { "epoch": 0.6, "learning_rate": 8.070271006400654e-06, "loss": 0.657, "step": 13140 }, { "epoch": 0.6, "learning_rate": 8.061192065005221e-06, "loss": 0.7016, "step": 13150 }, { "epoch": 0.6, "learning_rate": 8.052113123609789e-06, "loss": 0.684, "step": 13160 }, { "epoch": 0.6, "learning_rate": 8.043034182214355e-06, "loss": 0.6338, "step": 13170 }, { "epoch": 0.6, "learning_rate": 8.03395524081892e-06, "loss": 0.6326, "step": 13180 }, { "epoch": 0.6, "learning_rate": 8.024876299423488e-06, "loss": 0.6875, "step": 13190 }, { "epoch": 0.6, "learning_rate": 8.015797358028054e-06, "loss": 0.6289, "step": 13200 }, { "epoch": 0.6, "learning_rate": 8.006718416632622e-06, "loss": 0.7225, "step": 13210 }, { "epoch": 0.6, "learning_rate": 7.997639475237187e-06, "loss": 0.7109, "step": 13220 }, { "epoch": 0.6, "learning_rate": 7.988560533841755e-06, "loss": 0.6988, "step": 13230 }, { "epoch": 0.6, "learning_rate": 7.979481592446321e-06, "loss": 0.6207, "step": 13240 }, { "epoch": 0.6, "learning_rate": 7.970402651050888e-06, "loss": 0.6271, "step": 13250 }, { "epoch": 0.6, "learning_rate": 7.961323709655456e-06, "loss": 0.6178, "step": 13260 }, { "epoch": 0.6, "learning_rate": 7.952244768260022e-06, "loss": 0.6682, "step": 13270 }, { "epoch": 0.6, "learning_rate": 7.943165826864588e-06, "loss": 0.6662, "step": 13280 }, { "epoch": 0.6, "learning_rate": 7.934086885469155e-06, "loss": 0.7354, "step": 13290 }, { "epoch": 0.6, "learning_rate": 7.925007944073721e-06, "loss": 0.6856, "step": 13300 }, { "epoch": 0.6, "learning_rate": 7.915929002678289e-06, "loss": 0.6242, "step": 13310 }, { "epoch": 0.6, "learning_rate": 7.906850061282854e-06, "loss": 0.6432, "step": 13320 }, { "epoch": 0.61, "learning_rate": 7.897771119887422e-06, "loss": 0.6941, "step": 13330 }, { "epoch": 0.61, "learning_rate": 7.88869217849199e-06, "loss": 0.7207, "step": 13340 }, { "epoch": 0.61, "learning_rate": 7.879613237096555e-06, "loss": 0.6047, "step": 13350 }, { "epoch": 0.61, "learning_rate": 7.870534295701123e-06, "loss": 0.6664, "step": 13360 }, { "epoch": 0.61, "learning_rate": 7.861455354305689e-06, "loss": 0.5578, "step": 13370 }, { "epoch": 0.61, "learning_rate": 7.852376412910255e-06, "loss": 0.6244, "step": 13380 }, { "epoch": 0.61, "learning_rate": 7.843297471514822e-06, "loss": 0.7566, "step": 13390 }, { "epoch": 0.61, "learning_rate": 7.834218530119388e-06, "loss": 0.666, "step": 13400 }, { "epoch": 0.61, "learning_rate": 7.825139588723956e-06, "loss": 0.5805, "step": 13410 }, { "epoch": 0.61, "learning_rate": 7.816060647328521e-06, "loss": 0.6395, "step": 13420 }, { "epoch": 0.61, "learning_rate": 7.806981705933089e-06, "loss": 0.7125, "step": 13430 }, { "epoch": 0.61, "learning_rate": 7.797902764537656e-06, "loss": 0.702, "step": 13440 }, { "epoch": 0.61, "learning_rate": 7.788823823142222e-06, "loss": 0.6018, "step": 13450 }, { "epoch": 0.61, "learning_rate": 7.77974488174679e-06, "loss": 0.7211, "step": 13460 }, { "epoch": 0.61, "learning_rate": 7.770665940351356e-06, "loss": 0.6887, "step": 13470 }, { "epoch": 0.61, "learning_rate": 7.761586998955922e-06, "loss": 0.666, "step": 13480 }, { "epoch": 0.61, "learning_rate": 7.752508057560489e-06, "loss": 0.6154, "step": 13490 }, { "epoch": 0.61, "learning_rate": 7.743429116165055e-06, "loss": 0.7082, "step": 13500 }, { "epoch": 0.61, "eval_accuracy": 0.6262675036214389, "eval_loss": 0.66704922914505, "eval_runtime": 72.8876, "eval_samples_per_second": 56.827, "eval_steps_per_second": 14.214, "step": 13500 }, { "epoch": 0.61, "learning_rate": 7.734350174769623e-06, "loss": 0.6029, "step": 13510 }, { "epoch": 0.61, "learning_rate": 7.72527123337419e-06, "loss": 0.6395, "step": 13520 }, { "epoch": 0.61, "learning_rate": 7.716192291978756e-06, "loss": 0.6648, "step": 13530 }, { "epoch": 0.61, "learning_rate": 7.707113350583323e-06, "loss": 0.7348, "step": 13540 }, { "epoch": 0.62, "learning_rate": 7.69803440918789e-06, "loss": 0.583, "step": 13550 }, { "epoch": 0.62, "learning_rate": 7.688955467792457e-06, "loss": 0.707, "step": 13560 }, { "epoch": 0.62, "learning_rate": 7.679876526397023e-06, "loss": 0.625, "step": 13570 }, { "epoch": 0.62, "learning_rate": 7.670797585001589e-06, "loss": 0.6125, "step": 13580 }, { "epoch": 0.62, "learning_rate": 7.661718643606156e-06, "loss": 0.5121, "step": 13590 }, { "epoch": 0.62, "learning_rate": 7.652639702210722e-06, "loss": 0.7041, "step": 13600 }, { "epoch": 0.62, "learning_rate": 7.64356076081529e-06, "loss": 0.6555, "step": 13610 }, { "epoch": 0.62, "learning_rate": 7.634481819419857e-06, "loss": 0.5404, "step": 13620 }, { "epoch": 0.62, "learning_rate": 7.625402878024423e-06, "loss": 0.7129, "step": 13630 }, { "epoch": 0.62, "learning_rate": 7.6163239366289896e-06, "loss": 0.7699, "step": 13640 }, { "epoch": 0.62, "learning_rate": 7.607244995233556e-06, "loss": 0.6877, "step": 13650 }, { "epoch": 0.62, "learning_rate": 7.598166053838123e-06, "loss": 0.735, "step": 13660 }, { "epoch": 0.62, "learning_rate": 7.5890871124426905e-06, "loss": 0.7822, "step": 13670 }, { "epoch": 0.62, "learning_rate": 7.580008171047256e-06, "loss": 0.6674, "step": 13680 }, { "epoch": 0.62, "learning_rate": 7.570929229651823e-06, "loss": 0.6441, "step": 13690 }, { "epoch": 0.62, "learning_rate": 7.561850288256391e-06, "loss": 0.643, "step": 13700 }, { "epoch": 0.62, "learning_rate": 7.5527713468609564e-06, "loss": 0.6641, "step": 13710 }, { "epoch": 0.62, "learning_rate": 7.543692405465524e-06, "loss": 0.6915, "step": 13720 }, { "epoch": 0.62, "learning_rate": 7.53461346407009e-06, "loss": 0.7867, "step": 13730 }, { "epoch": 0.62, "learning_rate": 7.5255345226746565e-06, "loss": 0.7236, "step": 13740 }, { "epoch": 0.62, "learning_rate": 7.516455581279224e-06, "loss": 0.7305, "step": 13750 }, { "epoch": 0.62, "learning_rate": 7.50737663988379e-06, "loss": 0.6984, "step": 13760 }, { "epoch": 0.63, "learning_rate": 7.4982976984883575e-06, "loss": 0.7041, "step": 13770 }, { "epoch": 0.63, "learning_rate": 7.489218757092923e-06, "loss": 0.6264, "step": 13780 }, { "epoch": 0.63, "learning_rate": 7.48013981569749e-06, "loss": 0.6533, "step": 13790 }, { "epoch": 0.63, "learning_rate": 7.4710608743020575e-06, "loss": 0.7535, "step": 13800 }, { "epoch": 0.63, "learning_rate": 7.461981932906623e-06, "loss": 0.7496, "step": 13810 }, { "epoch": 0.63, "learning_rate": 7.452902991511191e-06, "loss": 0.7844, "step": 13820 }, { "epoch": 0.63, "learning_rate": 7.443824050115757e-06, "loss": 0.7195, "step": 13830 }, { "epoch": 0.63, "learning_rate": 7.4347451087203235e-06, "loss": 0.7344, "step": 13840 }, { "epoch": 0.63, "learning_rate": 7.425666167324891e-06, "loss": 0.623, "step": 13850 }, { "epoch": 0.63, "learning_rate": 7.416587225929457e-06, "loss": 0.6908, "step": 13860 }, { "epoch": 0.63, "learning_rate": 7.407508284534024e-06, "loss": 0.6525, "step": 13870 }, { "epoch": 0.63, "learning_rate": 7.398429343138591e-06, "loss": 0.6301, "step": 13880 }, { "epoch": 0.63, "learning_rate": 7.389350401743157e-06, "loss": 0.7547, "step": 13890 }, { "epoch": 0.63, "learning_rate": 7.3802714603477245e-06, "loss": 0.6453, "step": 13900 }, { "epoch": 0.63, "learning_rate": 7.37119251895229e-06, "loss": 0.8059, "step": 13910 }, { "epoch": 0.63, "learning_rate": 7.362113577556858e-06, "loss": 0.6408, "step": 13920 }, { "epoch": 0.63, "learning_rate": 7.353034636161425e-06, "loss": 0.6986, "step": 13930 }, { "epoch": 0.63, "learning_rate": 7.3439556947659904e-06, "loss": 0.5945, "step": 13940 }, { "epoch": 0.63, "learning_rate": 7.334876753370558e-06, "loss": 0.6809, "step": 13950 }, { "epoch": 0.63, "learning_rate": 7.325797811975124e-06, "loss": 0.7059, "step": 13960 }, { "epoch": 0.63, "learning_rate": 7.316718870579691e-06, "loss": 0.6023, "step": 13970 }, { "epoch": 0.63, "learning_rate": 7.307639929184258e-06, "loss": 0.5889, "step": 13980 }, { "epoch": 0.64, "learning_rate": 7.298560987788824e-06, "loss": 0.6645, "step": 13990 }, { "epoch": 0.64, "learning_rate": 7.2894820463933915e-06, "loss": 0.5768, "step": 14000 }, { "epoch": 0.64, "eval_accuracy": 0.6269917914051183, "eval_loss": 0.6619028449058533, "eval_runtime": 73.4725, "eval_samples_per_second": 56.375, "eval_steps_per_second": 14.101, "step": 14000 }, { "epoch": 0.64, "learning_rate": 7.280403104997957e-06, "loss": 0.6783, "step": 14010 }, { "epoch": 0.64, "learning_rate": 7.271324163602525e-06, "loss": 0.59, "step": 14020 }, { "epoch": 0.64, "learning_rate": 7.2622452222070916e-06, "loss": 0.682, "step": 14030 }, { "epoch": 0.64, "learning_rate": 7.253166280811657e-06, "loss": 0.6873, "step": 14040 }, { "epoch": 0.64, "learning_rate": 7.244087339416225e-06, "loss": 0.7082, "step": 14050 }, { "epoch": 0.64, "learning_rate": 7.235008398020792e-06, "loss": 0.8027, "step": 14060 }, { "epoch": 0.64, "learning_rate": 7.225929456625358e-06, "loss": 0.6777, "step": 14070 }, { "epoch": 0.64, "learning_rate": 7.216850515229925e-06, "loss": 0.6312, "step": 14080 }, { "epoch": 0.64, "learning_rate": 7.207771573834491e-06, "loss": 0.6381, "step": 14090 }, { "epoch": 0.64, "learning_rate": 7.1986926324390584e-06, "loss": 0.5955, "step": 14100 }, { "epoch": 0.64, "learning_rate": 7.189613691043625e-06, "loss": 0.6518, "step": 14110 }, { "epoch": 0.64, "learning_rate": 7.180534749648192e-06, "loss": 0.5652, "step": 14120 }, { "epoch": 0.64, "learning_rate": 7.1714558082527585e-06, "loss": 0.7027, "step": 14130 }, { "epoch": 0.64, "learning_rate": 7.162376866857324e-06, "loss": 0.615, "step": 14140 }, { "epoch": 0.64, "learning_rate": 7.153297925461892e-06, "loss": 0.568, "step": 14150 }, { "epoch": 0.64, "learning_rate": 7.144218984066459e-06, "loss": 0.6701, "step": 14160 }, { "epoch": 0.64, "learning_rate": 7.135140042671025e-06, "loss": 0.7063, "step": 14170 }, { "epoch": 0.64, "learning_rate": 7.126061101275592e-06, "loss": 0.6428, "step": 14180 }, { "epoch": 0.64, "learning_rate": 7.116982159880158e-06, "loss": 0.6539, "step": 14190 }, { "epoch": 0.64, "learning_rate": 7.107903218484725e-06, "loss": 0.7867, "step": 14200 }, { "epoch": 0.65, "learning_rate": 7.098824277089292e-06, "loss": 0.6078, "step": 14210 }, { "epoch": 0.65, "learning_rate": 7.089745335693859e-06, "loss": 0.5559, "step": 14220 }, { "epoch": 0.65, "learning_rate": 7.0806663942984255e-06, "loss": 0.5851, "step": 14230 }, { "epoch": 0.65, "learning_rate": 7.071587452902993e-06, "loss": 0.5799, "step": 14240 }, { "epoch": 0.65, "learning_rate": 7.062508511507559e-06, "loss": 0.5656, "step": 14250 }, { "epoch": 0.65, "learning_rate": 7.0534295701121256e-06, "loss": 0.5715, "step": 14260 }, { "epoch": 0.65, "learning_rate": 7.044350628716692e-06, "loss": 0.7535, "step": 14270 }, { "epoch": 0.65, "learning_rate": 7.035271687321259e-06, "loss": 0.607, "step": 14280 }, { "epoch": 0.65, "learning_rate": 7.0261927459258265e-06, "loss": 0.6488, "step": 14290 }, { "epoch": 0.65, "learning_rate": 7.017113804530392e-06, "loss": 0.5721, "step": 14300 }, { "epoch": 0.65, "learning_rate": 7.008034863134959e-06, "loss": 0.7064, "step": 14310 }, { "epoch": 0.65, "learning_rate": 6.998955921739526e-06, "loss": 0.5836, "step": 14320 }, { "epoch": 0.65, "learning_rate": 6.9898769803440924e-06, "loss": 0.624, "step": 14330 }, { "epoch": 0.65, "learning_rate": 6.98079803894866e-06, "loss": 0.5609, "step": 14340 }, { "epoch": 0.65, "learning_rate": 6.971719097553226e-06, "loss": 0.6859, "step": 14350 }, { "epoch": 0.65, "learning_rate": 6.9626401561577925e-06, "loss": 0.652, "step": 14360 }, { "epoch": 0.65, "learning_rate": 6.953561214762358e-06, "loss": 0.8596, "step": 14370 }, { "epoch": 0.65, "learning_rate": 6.944482273366926e-06, "loss": 0.641, "step": 14380 }, { "epoch": 0.65, "learning_rate": 6.9354033319714935e-06, "loss": 0.7641, "step": 14390 }, { "epoch": 0.65, "learning_rate": 6.926324390576059e-06, "loss": 0.7662, "step": 14400 }, { "epoch": 0.65, "learning_rate": 6.917245449180626e-06, "loss": 0.698, "step": 14410 }, { "epoch": 0.65, "learning_rate": 6.908166507785192e-06, "loss": 0.6398, "step": 14420 }, { "epoch": 0.66, "learning_rate": 6.899087566389759e-06, "loss": 0.7016, "step": 14430 }, { "epoch": 0.66, "learning_rate": 6.890008624994327e-06, "loss": 0.6867, "step": 14440 }, { "epoch": 0.66, "learning_rate": 6.880929683598893e-06, "loss": 0.6895, "step": 14450 }, { "epoch": 0.66, "learning_rate": 6.8718507422034595e-06, "loss": 0.6836, "step": 14460 }, { "epoch": 0.66, "learning_rate": 6.862771800808027e-06, "loss": 0.6588, "step": 14470 }, { "epoch": 0.66, "learning_rate": 6.853692859412593e-06, "loss": 0.7346, "step": 14480 }, { "epoch": 0.66, "learning_rate": 6.84461391801716e-06, "loss": 0.6236, "step": 14490 }, { "epoch": 0.66, "learning_rate": 6.835534976621726e-06, "loss": 0.6082, "step": 14500 }, { "epoch": 0.66, "eval_accuracy": 0.6154031868662482, "eval_loss": 0.6647462248802185, "eval_runtime": 73.4637, "eval_samples_per_second": 56.382, "eval_steps_per_second": 14.102, "step": 14500 }, { "epoch": 0.66, "learning_rate": 6.826456035226293e-06, "loss": 0.7277, "step": 14510 }, { "epoch": 0.66, "learning_rate": 6.8173770938308605e-06, "loss": 0.7398, "step": 14520 }, { "epoch": 0.66, "learning_rate": 6.808298152435426e-06, "loss": 0.702, "step": 14530 }, { "epoch": 0.66, "learning_rate": 6.799219211039994e-06, "loss": 0.6396, "step": 14540 }, { "epoch": 0.66, "learning_rate": 6.79014026964456e-06, "loss": 0.6211, "step": 14550 }, { "epoch": 0.66, "learning_rate": 6.7810613282491264e-06, "loss": 0.716, "step": 14560 }, { "epoch": 0.66, "learning_rate": 6.771982386853694e-06, "loss": 0.6299, "step": 14570 }, { "epoch": 0.66, "learning_rate": 6.76290344545826e-06, "loss": 0.6096, "step": 14580 }, { "epoch": 0.66, "learning_rate": 6.753824504062827e-06, "loss": 0.6121, "step": 14590 }, { "epoch": 0.66, "learning_rate": 6.744745562667393e-06, "loss": 0.65, "step": 14600 }, { "epoch": 0.66, "learning_rate": 6.73566662127196e-06, "loss": 0.7004, "step": 14610 }, { "epoch": 0.66, "learning_rate": 6.7265876798765275e-06, "loss": 0.5912, "step": 14620 }, { "epoch": 0.66, "learning_rate": 6.717508738481093e-06, "loss": 0.6816, "step": 14630 }, { "epoch": 0.66, "learning_rate": 6.708429797085661e-06, "loss": 0.7867, "step": 14640 }, { "epoch": 0.67, "learning_rate": 6.6993508556902276e-06, "loss": 0.6977, "step": 14650 }, { "epoch": 0.67, "learning_rate": 6.690271914294793e-06, "loss": 0.7098, "step": 14660 }, { "epoch": 0.67, "learning_rate": 6.681192972899361e-06, "loss": 0.749, "step": 14670 }, { "epoch": 0.67, "learning_rate": 6.672114031503927e-06, "loss": 0.5707, "step": 14680 }, { "epoch": 0.67, "learning_rate": 6.663035090108494e-06, "loss": 0.7465, "step": 14690 }, { "epoch": 0.67, "learning_rate": 6.653956148713061e-06, "loss": 0.6639, "step": 14700 }, { "epoch": 0.67, "learning_rate": 6.644877207317627e-06, "loss": 0.7758, "step": 14710 }, { "epoch": 0.67, "learning_rate": 6.6357982659221944e-06, "loss": 0.6439, "step": 14720 }, { "epoch": 0.67, "learning_rate": 6.62671932452676e-06, "loss": 0.5809, "step": 14730 }, { "epoch": 0.67, "learning_rate": 6.617640383131327e-06, "loss": 0.6887, "step": 14740 }, { "epoch": 0.67, "learning_rate": 6.6085614417358945e-06, "loss": 0.6861, "step": 14750 }, { "epoch": 0.67, "learning_rate": 6.59948250034046e-06, "loss": 0.6762, "step": 14760 }, { "epoch": 0.67, "learning_rate": 6.590403558945028e-06, "loss": 0.698, "step": 14770 }, { "epoch": 0.67, "learning_rate": 6.581324617549594e-06, "loss": 0.7, "step": 14780 }, { "epoch": 0.67, "learning_rate": 6.5722456761541605e-06, "loss": 0.652, "step": 14790 }, { "epoch": 0.67, "learning_rate": 6.563166734758728e-06, "loss": 0.6787, "step": 14800 }, { "epoch": 0.67, "learning_rate": 6.554087793363294e-06, "loss": 0.6215, "step": 14810 }, { "epoch": 0.67, "learning_rate": 6.545008851967861e-06, "loss": 0.8277, "step": 14820 }, { "epoch": 0.67, "learning_rate": 6.535929910572428e-06, "loss": 0.6615, "step": 14830 }, { "epoch": 0.67, "learning_rate": 6.526850969176994e-06, "loss": 0.6617, "step": 14840 }, { "epoch": 0.67, "learning_rate": 6.5177720277815615e-06, "loss": 0.6766, "step": 14850 }, { "epoch": 0.67, "learning_rate": 6.508693086386127e-06, "loss": 0.7777, "step": 14860 }, { "epoch": 0.68, "learning_rate": 6.499614144990695e-06, "loss": 0.7141, "step": 14870 }, { "epoch": 0.68, "learning_rate": 6.4905352035952616e-06, "loss": 0.6637, "step": 14880 }, { "epoch": 0.68, "learning_rate": 6.481456262199827e-06, "loss": 0.6723, "step": 14890 }, { "epoch": 0.68, "learning_rate": 6.472377320804395e-06, "loss": 0.6975, "step": 14900 }, { "epoch": 0.68, "learning_rate": 6.463298379408961e-06, "loss": 0.7087, "step": 14910 }, { "epoch": 0.68, "learning_rate": 6.454219438013528e-06, "loss": 0.6736, "step": 14920 }, { "epoch": 0.68, "learning_rate": 6.445140496618095e-06, "loss": 0.6164, "step": 14930 }, { "epoch": 0.68, "learning_rate": 6.436061555222661e-06, "loss": 0.6373, "step": 14940 }, { "epoch": 0.68, "learning_rate": 6.4269826138272284e-06, "loss": 0.707, "step": 14950 }, { "epoch": 0.68, "learning_rate": 6.417903672431794e-06, "loss": 0.6229, "step": 14960 }, { "epoch": 0.68, "learning_rate": 6.408824731036362e-06, "loss": 0.734, "step": 14970 }, { "epoch": 0.68, "learning_rate": 6.3997457896409285e-06, "loss": 0.6488, "step": 14980 }, { "epoch": 0.68, "learning_rate": 6.390666848245494e-06, "loss": 0.6781, "step": 14990 }, { "epoch": 0.68, "learning_rate": 6.381587906850062e-06, "loss": 0.616, "step": 15000 }, { "epoch": 0.68, "eval_accuracy": 0.623128923225495, "eval_loss": 0.6619547009468079, "eval_runtime": 73.1853, "eval_samples_per_second": 56.596, "eval_steps_per_second": 14.156, "step": 15000 }, { "epoch": 0.68, "learning_rate": 6.3725089654546295e-06, "loss": 0.6363, "step": 15010 }, { "epoch": 0.68, "learning_rate": 6.363430024059195e-06, "loss": 0.7072, "step": 15020 }, { "epoch": 0.68, "learning_rate": 6.354351082663762e-06, "loss": 0.6193, "step": 15030 }, { "epoch": 0.68, "learning_rate": 6.345272141268328e-06, "loss": 0.6348, "step": 15040 }, { "epoch": 0.68, "learning_rate": 6.336193199872895e-06, "loss": 0.6953, "step": 15050 }, { "epoch": 0.68, "learning_rate": 6.327114258477463e-06, "loss": 0.5598, "step": 15060 }, { "epoch": 0.68, "learning_rate": 6.318035317082029e-06, "loss": 0.6547, "step": 15070 }, { "epoch": 0.68, "learning_rate": 6.3089563756865955e-06, "loss": 0.6287, "step": 15080 }, { "epoch": 0.69, "learning_rate": 6.299877434291161e-06, "loss": 0.5846, "step": 15090 }, { "epoch": 0.69, "learning_rate": 6.290798492895729e-06, "loss": 0.7217, "step": 15100 }, { "epoch": 0.69, "learning_rate": 6.281719551500296e-06, "loss": 0.7309, "step": 15110 }, { "epoch": 0.69, "learning_rate": 6.272640610104862e-06, "loss": 0.6762, "step": 15120 }, { "epoch": 0.69, "learning_rate": 6.263561668709429e-06, "loss": 0.7738, "step": 15130 }, { "epoch": 0.69, "learning_rate": 6.254482727313995e-06, "loss": 0.6906, "step": 15140 }, { "epoch": 0.69, "learning_rate": 6.245403785918562e-06, "loss": 0.6582, "step": 15150 }, { "epoch": 0.69, "learning_rate": 6.236324844523129e-06, "loss": 0.6228, "step": 15160 }, { "epoch": 0.69, "learning_rate": 6.227245903127696e-06, "loss": 0.7645, "step": 15170 }, { "epoch": 0.69, "learning_rate": 6.2181669617322625e-06, "loss": 0.6684, "step": 15180 }, { "epoch": 0.69, "learning_rate": 6.20908802033683e-06, "loss": 0.7598, "step": 15190 }, { "epoch": 0.69, "learning_rate": 6.200009078941396e-06, "loss": 0.6254, "step": 15200 }, { "epoch": 0.69, "learning_rate": 6.1909301375459625e-06, "loss": 0.6281, "step": 15210 }, { "epoch": 0.69, "learning_rate": 6.181851196150529e-06, "loss": 0.6301, "step": 15220 }, { "epoch": 0.69, "learning_rate": 6.172772254755096e-06, "loss": 0.6805, "step": 15230 }, { "epoch": 0.69, "learning_rate": 6.1636933133596635e-06, "loss": 0.7369, "step": 15240 }, { "epoch": 0.69, "learning_rate": 6.154614371964229e-06, "loss": 0.6443, "step": 15250 }, { "epoch": 0.69, "learning_rate": 6.145535430568796e-06, "loss": 0.6247, "step": 15260 }, { "epoch": 0.69, "learning_rate": 6.136456489173363e-06, "loss": 0.6518, "step": 15270 }, { "epoch": 0.69, "learning_rate": 6.127377547777929e-06, "loss": 0.6061, "step": 15280 }, { "epoch": 0.69, "learning_rate": 6.118298606382497e-06, "loss": 0.6303, "step": 15290 }, { "epoch": 0.69, "learning_rate": 6.109219664987063e-06, "loss": 0.5945, "step": 15300 }, { "epoch": 0.69, "learning_rate": 6.1001407235916295e-06, "loss": 0.5559, "step": 15310 }, { "epoch": 0.7, "learning_rate": 6.091061782196196e-06, "loss": 0.6348, "step": 15320 }, { "epoch": 0.7, "learning_rate": 6.081982840800763e-06, "loss": 0.5775, "step": 15330 }, { "epoch": 0.7, "learning_rate": 6.0729038994053304e-06, "loss": 0.6215, "step": 15340 }, { "epoch": 0.7, "learning_rate": 6.063824958009896e-06, "loss": 0.641, "step": 15350 }, { "epoch": 0.7, "learning_rate": 6.054746016614463e-06, "loss": 0.6947, "step": 15360 }, { "epoch": 0.7, "learning_rate": 6.04566707521903e-06, "loss": 0.698, "step": 15370 }, { "epoch": 0.7, "learning_rate": 6.036588133823596e-06, "loss": 0.7543, "step": 15380 }, { "epoch": 0.7, "learning_rate": 6.027509192428164e-06, "loss": 0.7227, "step": 15390 }, { "epoch": 0.7, "learning_rate": 6.01843025103273e-06, "loss": 0.6172, "step": 15400 }, { "epoch": 0.7, "learning_rate": 6.0093513096372965e-06, "loss": 0.7646, "step": 15410 }, { "epoch": 0.7, "learning_rate": 6.000272368241864e-06, "loss": 0.7711, "step": 15420 }, { "epoch": 0.7, "learning_rate": 5.99119342684643e-06, "loss": 0.6197, "step": 15430 }, { "epoch": 0.7, "learning_rate": 5.982114485450997e-06, "loss": 0.6904, "step": 15440 }, { "epoch": 0.7, "learning_rate": 5.973035544055563e-06, "loss": 0.7703, "step": 15450 }, { "epoch": 0.7, "learning_rate": 5.96395660266013e-06, "loss": 0.607, "step": 15460 }, { "epoch": 0.7, "learning_rate": 5.9548776612646975e-06, "loss": 0.877, "step": 15470 }, { "epoch": 0.7, "learning_rate": 5.945798719869263e-06, "loss": 0.6273, "step": 15480 }, { "epoch": 0.7, "learning_rate": 5.936719778473831e-06, "loss": 0.7051, "step": 15490 }, { "epoch": 0.7, "learning_rate": 5.927640837078397e-06, "loss": 0.7094, "step": 15500 }, { "epoch": 0.7, "eval_accuracy": 0.6284403669724771, "eval_loss": 0.6594442129135132, "eval_runtime": 73.777, "eval_samples_per_second": 56.142, "eval_steps_per_second": 14.042, "step": 15500 }, { "epoch": 0.7, "learning_rate": 5.9185618956829634e-06, "loss": 0.6646, "step": 15510 }, { "epoch": 0.7, "learning_rate": 5.909482954287531e-06, "loss": 0.6553, "step": 15520 }, { "epoch": 0.7, "learning_rate": 5.900404012892097e-06, "loss": 0.7227, "step": 15530 }, { "epoch": 0.71, "learning_rate": 5.891325071496664e-06, "loss": 0.6779, "step": 15540 }, { "epoch": 0.71, "learning_rate": 5.88224613010123e-06, "loss": 0.6443, "step": 15550 }, { "epoch": 0.71, "learning_rate": 5.873167188705797e-06, "loss": 0.7105, "step": 15560 }, { "epoch": 0.71, "learning_rate": 5.8640882473103644e-06, "loss": 0.6504, "step": 15570 }, { "epoch": 0.71, "learning_rate": 5.85500930591493e-06, "loss": 0.5859, "step": 15580 }, { "epoch": 0.71, "learning_rate": 5.845930364519498e-06, "loss": 0.5393, "step": 15590 }, { "epoch": 0.71, "learning_rate": 5.8368514231240645e-06, "loss": 0.6582, "step": 15600 }, { "epoch": 0.71, "learning_rate": 5.82777248172863e-06, "loss": 0.6428, "step": 15610 }, { "epoch": 0.71, "learning_rate": 5.818693540333198e-06, "loss": 0.6365, "step": 15620 }, { "epoch": 0.71, "learning_rate": 5.809614598937764e-06, "loss": 0.6689, "step": 15630 }, { "epoch": 0.71, "learning_rate": 5.800535657542331e-06, "loss": 0.6746, "step": 15640 }, { "epoch": 0.71, "learning_rate": 5.791456716146898e-06, "loss": 0.7037, "step": 15650 }, { "epoch": 0.71, "learning_rate": 5.782377774751464e-06, "loss": 0.6582, "step": 15660 }, { "epoch": 0.71, "learning_rate": 5.773298833356031e-06, "loss": 0.5789, "step": 15670 }, { "epoch": 0.71, "learning_rate": 5.764219891960597e-06, "loss": 0.6115, "step": 15680 }, { "epoch": 0.71, "learning_rate": 5.755140950565165e-06, "loss": 0.6639, "step": 15690 }, { "epoch": 0.71, "learning_rate": 5.7460620091697315e-06, "loss": 0.7234, "step": 15700 }, { "epoch": 0.71, "learning_rate": 5.736983067774297e-06, "loss": 0.6412, "step": 15710 }, { "epoch": 0.71, "learning_rate": 5.727904126378865e-06, "loss": 0.6016, "step": 15720 }, { "epoch": 0.71, "learning_rate": 5.718825184983431e-06, "loss": 0.6486, "step": 15730 }, { "epoch": 0.71, "learning_rate": 5.709746243587998e-06, "loss": 0.6322, "step": 15740 }, { "epoch": 0.71, "learning_rate": 5.700667302192565e-06, "loss": 0.649, "step": 15750 }, { "epoch": 0.72, "learning_rate": 5.691588360797131e-06, "loss": 0.65, "step": 15760 }, { "epoch": 0.72, "learning_rate": 5.682509419401698e-06, "loss": 0.6649, "step": 15770 }, { "epoch": 0.72, "learning_rate": 5.673430478006265e-06, "loss": 0.6457, "step": 15780 }, { "epoch": 0.72, "learning_rate": 5.664351536610832e-06, "loss": 0.6773, "step": 15790 }, { "epoch": 0.72, "learning_rate": 5.6552725952153985e-06, "loss": 0.5795, "step": 15800 }, { "epoch": 0.72, "learning_rate": 5.646193653819964e-06, "loss": 0.5668, "step": 15810 }, { "epoch": 0.72, "learning_rate": 5.637114712424532e-06, "loss": 0.5938, "step": 15820 }, { "epoch": 0.72, "learning_rate": 5.6280357710290985e-06, "loss": 0.7176, "step": 15830 }, { "epoch": 0.72, "learning_rate": 5.618956829633665e-06, "loss": 0.6992, "step": 15840 }, { "epoch": 0.72, "learning_rate": 5.609877888238232e-06, "loss": 0.6344, "step": 15850 }, { "epoch": 0.72, "learning_rate": 5.600798946842798e-06, "loss": 0.5469, "step": 15860 }, { "epoch": 0.72, "learning_rate": 5.591720005447365e-06, "loss": 0.6225, "step": 15870 }, { "epoch": 0.72, "learning_rate": 5.582641064051932e-06, "loss": 0.6426, "step": 15880 }, { "epoch": 0.72, "learning_rate": 5.573562122656499e-06, "loss": 0.6982, "step": 15890 }, { "epoch": 0.72, "learning_rate": 5.564483181261065e-06, "loss": 0.6342, "step": 15900 }, { "epoch": 0.72, "learning_rate": 5.555404239865631e-06, "loss": 0.6494, "step": 15910 }, { "epoch": 0.72, "learning_rate": 5.546325298470199e-06, "loss": 0.8711, "step": 15920 }, { "epoch": 0.72, "learning_rate": 5.5372463570747655e-06, "loss": 0.6488, "step": 15930 }, { "epoch": 0.72, "learning_rate": 5.528167415679332e-06, "loss": 0.6447, "step": 15940 }, { "epoch": 0.72, "learning_rate": 5.519088474283899e-06, "loss": 0.6604, "step": 15950 }, { "epoch": 0.72, "learning_rate": 5.5100095328884664e-06, "loss": 0.6629, "step": 15960 }, { "epoch": 0.72, "learning_rate": 5.500930591493032e-06, "loss": 0.7656, "step": 15970 }, { "epoch": 0.73, "learning_rate": 5.491851650097599e-06, "loss": 0.6625, "step": 15980 }, { "epoch": 0.73, "learning_rate": 5.482772708702166e-06, "loss": 0.6834, "step": 15990 }, { "epoch": 0.73, "learning_rate": 5.473693767306732e-06, "loss": 0.5994, "step": 16000 }, { "epoch": 0.73, "eval_accuracy": 0.6253017865765331, "eval_loss": 0.6599798798561096, "eval_runtime": 73.4934, "eval_samples_per_second": 56.359, "eval_steps_per_second": 14.097, "step": 16000 }, { "epoch": 0.73, "learning_rate": 5.4646148259113e-06, "loss": 0.4986, "step": 16010 }, { "epoch": 0.73, "learning_rate": 5.455535884515866e-06, "loss": 0.6568, "step": 16020 }, { "epoch": 0.73, "learning_rate": 5.4464569431204325e-06, "loss": 0.7852, "step": 16030 }, { "epoch": 0.73, "learning_rate": 5.437378001724999e-06, "loss": 0.6438, "step": 16040 }, { "epoch": 0.73, "learning_rate": 5.428299060329566e-06, "loss": 0.6207, "step": 16050 }, { "epoch": 0.73, "learning_rate": 5.419220118934133e-06, "loss": 0.7109, "step": 16060 }, { "epoch": 0.73, "learning_rate": 5.410141177538699e-06, "loss": 0.5861, "step": 16070 }, { "epoch": 0.73, "learning_rate": 5.401062236143266e-06, "loss": 0.698, "step": 16080 }, { "epoch": 0.73, "learning_rate": 5.391983294747833e-06, "loss": 0.6994, "step": 16090 }, { "epoch": 0.73, "learning_rate": 5.382904353352399e-06, "loss": 0.7209, "step": 16100 }, { "epoch": 0.73, "learning_rate": 5.373825411956967e-06, "loss": 0.6584, "step": 16110 }, { "epoch": 0.73, "learning_rate": 5.364746470561533e-06, "loss": 0.6723, "step": 16120 }, { "epoch": 0.73, "learning_rate": 5.3556675291660994e-06, "loss": 0.609, "step": 16130 }, { "epoch": 0.73, "learning_rate": 5.346588587770667e-06, "loss": 0.6607, "step": 16140 }, { "epoch": 0.73, "learning_rate": 5.337509646375233e-06, "loss": 0.6723, "step": 16150 }, { "epoch": 0.73, "learning_rate": 5.3284307049798e-06, "loss": 0.6447, "step": 16160 }, { "epoch": 0.73, "learning_rate": 5.319351763584366e-06, "loss": 0.732, "step": 16170 }, { "epoch": 0.73, "learning_rate": 5.310272822188933e-06, "loss": 0.6377, "step": 16180 }, { "epoch": 0.73, "learning_rate": 5.3011938807935005e-06, "loss": 0.6717, "step": 16190 }, { "epoch": 0.74, "learning_rate": 5.292114939398066e-06, "loss": 0.6777, "step": 16200 }, { "epoch": 0.74, "learning_rate": 5.283035998002634e-06, "loss": 0.7375, "step": 16210 }, { "epoch": 0.74, "learning_rate": 5.2739570566072e-06, "loss": 0.6996, "step": 16220 }, { "epoch": 0.74, "learning_rate": 5.264878115211766e-06, "loss": 0.8133, "step": 16230 }, { "epoch": 0.74, "learning_rate": 5.255799173816334e-06, "loss": 0.6363, "step": 16240 }, { "epoch": 0.74, "learning_rate": 5.2467202324209e-06, "loss": 0.7176, "step": 16250 }, { "epoch": 0.74, "learning_rate": 5.237641291025467e-06, "loss": 0.7248, "step": 16260 }, { "epoch": 0.74, "learning_rate": 5.228562349630033e-06, "loss": 0.7535, "step": 16270 }, { "epoch": 0.74, "learning_rate": 5.2194834082346e-06, "loss": 0.6496, "step": 16280 }, { "epoch": 0.74, "learning_rate": 5.210404466839167e-06, "loss": 0.6551, "step": 16290 }, { "epoch": 0.74, "learning_rate": 5.201325525443733e-06, "loss": 0.7176, "step": 16300 }, { "epoch": 0.74, "learning_rate": 5.192246584048301e-06, "loss": 0.7488, "step": 16310 }, { "epoch": 0.74, "learning_rate": 5.1831676426528675e-06, "loss": 0.5922, "step": 16320 }, { "epoch": 0.74, "learning_rate": 5.174088701257433e-06, "loss": 0.6285, "step": 16330 }, { "epoch": 0.74, "learning_rate": 5.165009759862001e-06, "loss": 0.7195, "step": 16340 }, { "epoch": 0.74, "learning_rate": 5.155930818466567e-06, "loss": 0.5939, "step": 16350 }, { "epoch": 0.74, "learning_rate": 5.146851877071134e-06, "loss": 0.6443, "step": 16360 }, { "epoch": 0.74, "learning_rate": 5.137772935675701e-06, "loss": 0.7504, "step": 16370 }, { "epoch": 0.74, "learning_rate": 5.128693994280267e-06, "loss": 0.5725, "step": 16380 }, { "epoch": 0.74, "learning_rate": 5.119615052884834e-06, "loss": 0.6207, "step": 16390 }, { "epoch": 0.74, "learning_rate": 5.1105361114894e-06, "loss": 0.6525, "step": 16400 }, { "epoch": 0.74, "learning_rate": 5.101457170093968e-06, "loss": 0.6426, "step": 16410 }, { "epoch": 0.75, "learning_rate": 5.0923782286985345e-06, "loss": 0.6492, "step": 16420 }, { "epoch": 0.75, "learning_rate": 5.0832992873031e-06, "loss": 0.6195, "step": 16430 }, { "epoch": 0.75, "learning_rate": 5.074220345907668e-06, "loss": 0.7227, "step": 16440 }, { "epoch": 0.75, "learning_rate": 5.065141404512234e-06, "loss": 0.6459, "step": 16450 }, { "epoch": 0.75, "learning_rate": 5.056062463116801e-06, "loss": 0.7871, "step": 16460 }, { "epoch": 0.75, "learning_rate": 5.046983521721368e-06, "loss": 0.709, "step": 16470 }, { "epoch": 0.75, "learning_rate": 5.037904580325934e-06, "loss": 0.7148, "step": 16480 }, { "epoch": 0.75, "learning_rate": 5.028825638930501e-06, "loss": 0.7053, "step": 16490 }, { "epoch": 0.75, "learning_rate": 5.019746697535067e-06, "loss": 0.6969, "step": 16500 }, { "epoch": 0.75, "eval_accuracy": 0.6332689521970063, "eval_loss": 0.6564537286758423, "eval_runtime": 73.2629, "eval_samples_per_second": 56.536, "eval_steps_per_second": 14.141, "step": 16500 }, { "epoch": 0.75, "learning_rate": 5.010667756139635e-06, "loss": 0.6316, "step": 16510 }, { "epoch": 0.75, "learning_rate": 5.0015888147442014e-06, "loss": 0.649, "step": 16520 }, { "epoch": 0.75, "learning_rate": 4.992509873348768e-06, "loss": 0.6922, "step": 16530 }, { "epoch": 0.75, "learning_rate": 4.983430931953335e-06, "loss": 0.7539, "step": 16540 }, { "epoch": 0.75, "learning_rate": 4.9743519905579015e-06, "loss": 0.7371, "step": 16550 }, { "epoch": 0.75, "learning_rate": 4.965273049162468e-06, "loss": 0.7238, "step": 16560 }, { "epoch": 0.75, "learning_rate": 4.956194107767035e-06, "loss": 0.6398, "step": 16570 }, { "epoch": 0.75, "learning_rate": 4.947115166371602e-06, "loss": 0.6449, "step": 16580 }, { "epoch": 0.75, "learning_rate": 4.938036224976168e-06, "loss": 0.6453, "step": 16590 }, { "epoch": 0.75, "learning_rate": 4.928957283580735e-06, "loss": 0.6664, "step": 16600 }, { "epoch": 0.75, "learning_rate": 4.919878342185302e-06, "loss": 0.6332, "step": 16610 }, { "epoch": 0.75, "learning_rate": 4.910799400789868e-06, "loss": 0.6787, "step": 16620 }, { "epoch": 0.75, "learning_rate": 4.901720459394435e-06, "loss": 0.6451, "step": 16630 }, { "epoch": 0.76, "learning_rate": 4.892641517999002e-06, "loss": 0.6668, "step": 16640 }, { "epoch": 0.76, "learning_rate": 4.8835625766035685e-06, "loss": 0.7297, "step": 16650 }, { "epoch": 0.76, "learning_rate": 4.874483635208135e-06, "loss": 0.6457, "step": 16660 }, { "epoch": 0.76, "learning_rate": 4.865404693812702e-06, "loss": 0.709, "step": 16670 }, { "epoch": 0.76, "learning_rate": 4.8563257524172686e-06, "loss": 0.6281, "step": 16680 }, { "epoch": 0.76, "learning_rate": 4.847246811021835e-06, "loss": 0.7352, "step": 16690 }, { "epoch": 0.76, "learning_rate": 4.838167869626402e-06, "loss": 0.6623, "step": 16700 }, { "epoch": 0.76, "learning_rate": 4.829088928230969e-06, "loss": 0.6873, "step": 16710 }, { "epoch": 0.76, "learning_rate": 4.820009986835535e-06, "loss": 0.6742, "step": 16720 }, { "epoch": 0.76, "learning_rate": 4.810931045440102e-06, "loss": 0.5553, "step": 16730 }, { "epoch": 0.76, "learning_rate": 4.801852104044669e-06, "loss": 0.652, "step": 16740 }, { "epoch": 0.76, "learning_rate": 4.7927731626492354e-06, "loss": 0.6018, "step": 16750 }, { "epoch": 0.76, "learning_rate": 4.783694221253802e-06, "loss": 0.6379, "step": 16760 }, { "epoch": 0.76, "learning_rate": 4.774615279858369e-06, "loss": 0.6402, "step": 16770 }, { "epoch": 0.76, "learning_rate": 4.7655363384629355e-06, "loss": 0.6189, "step": 16780 }, { "epoch": 0.76, "learning_rate": 4.756457397067502e-06, "loss": 0.7432, "step": 16790 }, { "epoch": 0.76, "learning_rate": 4.747378455672069e-06, "loss": 0.7102, "step": 16800 }, { "epoch": 0.76, "learning_rate": 4.738299514276636e-06, "loss": 0.6535, "step": 16810 }, { "epoch": 0.76, "learning_rate": 4.729220572881202e-06, "loss": 0.6627, "step": 16820 }, { "epoch": 0.76, "learning_rate": 4.720141631485769e-06, "loss": 0.5818, "step": 16830 }, { "epoch": 0.76, "learning_rate": 4.711062690090336e-06, "loss": 0.635, "step": 16840 }, { "epoch": 0.76, "learning_rate": 4.701983748694902e-06, "loss": 0.7068, "step": 16850 }, { "epoch": 0.77, "learning_rate": 4.692904807299469e-06, "loss": 0.6361, "step": 16860 }, { "epoch": 0.77, "learning_rate": 4.683825865904036e-06, "loss": 0.5547, "step": 16870 }, { "epoch": 0.77, "learning_rate": 4.6747469245086025e-06, "loss": 0.5598, "step": 16880 }, { "epoch": 0.77, "learning_rate": 4.665667983113169e-06, "loss": 0.6877, "step": 16890 }, { "epoch": 0.77, "learning_rate": 4.656589041717736e-06, "loss": 0.6459, "step": 16900 }, { "epoch": 0.77, "learning_rate": 4.647510100322303e-06, "loss": 0.575, "step": 16910 }, { "epoch": 0.77, "learning_rate": 4.638431158926869e-06, "loss": 0.6687, "step": 16920 }, { "epoch": 0.77, "learning_rate": 4.629352217531437e-06, "loss": 0.6676, "step": 16930 }, { "epoch": 0.77, "learning_rate": 4.620273276136003e-06, "loss": 0.7098, "step": 16940 }, { "epoch": 0.77, "learning_rate": 4.611194334740569e-06, "loss": 0.7119, "step": 16950 }, { "epoch": 0.77, "learning_rate": 4.602115393345136e-06, "loss": 0.5803, "step": 16960 }, { "epoch": 0.77, "learning_rate": 4.593036451949703e-06, "loss": 0.6615, "step": 16970 }, { "epoch": 0.77, "learning_rate": 4.58395751055427e-06, "loss": 0.6672, "step": 16980 }, { "epoch": 0.77, "learning_rate": 4.574878569158836e-06, "loss": 0.7334, "step": 16990 }, { "epoch": 0.77, "learning_rate": 4.565799627763403e-06, "loss": 0.7033, "step": 17000 }, { "epoch": 0.77, "eval_accuracy": 0.6286817962337036, "eval_loss": 0.6592499613761902, "eval_runtime": 72.898, "eval_samples_per_second": 56.819, "eval_steps_per_second": 14.212, "step": 17000 }, { "epoch": 0.77, "learning_rate": 4.5567206863679695e-06, "loss": 0.6502, "step": 17010 }, { "epoch": 0.77, "learning_rate": 4.547641744972536e-06, "loss": 0.7656, "step": 17020 }, { "epoch": 0.77, "learning_rate": 4.538562803577104e-06, "loss": 0.6209, "step": 17030 }, { "epoch": 0.77, "learning_rate": 4.52948386218167e-06, "loss": 0.624, "step": 17040 }, { "epoch": 0.77, "learning_rate": 4.520404920786236e-06, "loss": 0.6344, "step": 17050 }, { "epoch": 0.77, "learning_rate": 4.511325979390803e-06, "loss": 0.6299, "step": 17060 }, { "epoch": 0.77, "learning_rate": 4.5022470379953706e-06, "loss": 0.6219, "step": 17070 }, { "epoch": 0.78, "learning_rate": 4.493168096599937e-06, "loss": 0.6348, "step": 17080 }, { "epoch": 0.78, "learning_rate": 4.484089155204503e-06, "loss": 0.7537, "step": 17090 }, { "epoch": 0.78, "learning_rate": 4.47501021380907e-06, "loss": 0.5959, "step": 17100 }, { "epoch": 0.78, "learning_rate": 4.4659312724136365e-06, "loss": 0.7732, "step": 17110 }, { "epoch": 0.78, "learning_rate": 4.456852331018204e-06, "loss": 0.6707, "step": 17120 }, { "epoch": 0.78, "learning_rate": 4.447773389622771e-06, "loss": 0.6881, "step": 17130 }, { "epoch": 0.78, "learning_rate": 4.438694448227337e-06, "loss": 0.6984, "step": 17140 }, { "epoch": 0.78, "learning_rate": 4.429615506831903e-06, "loss": 0.5365, "step": 17150 }, { "epoch": 0.78, "learning_rate": 4.420536565436471e-06, "loss": 0.6693, "step": 17160 }, { "epoch": 0.78, "learning_rate": 4.4114576240410375e-06, "loss": 0.6051, "step": 17170 }, { "epoch": 0.78, "learning_rate": 4.402378682645604e-06, "loss": 0.6814, "step": 17180 }, { "epoch": 0.78, "learning_rate": 4.39329974125017e-06, "loss": 0.7059, "step": 17190 }, { "epoch": 0.78, "learning_rate": 4.384220799854737e-06, "loss": 0.6785, "step": 17200 }, { "epoch": 0.78, "learning_rate": 4.375141858459304e-06, "loss": 0.5293, "step": 17210 }, { "epoch": 0.78, "learning_rate": 4.366062917063871e-06, "loss": 0.7861, "step": 17220 }, { "epoch": 0.78, "learning_rate": 4.356983975668438e-06, "loss": 0.6998, "step": 17230 }, { "epoch": 0.78, "learning_rate": 4.3479050342730035e-06, "loss": 0.6299, "step": 17240 }, { "epoch": 0.78, "learning_rate": 4.338826092877571e-06, "loss": 0.5969, "step": 17250 }, { "epoch": 0.78, "learning_rate": 4.329747151482138e-06, "loss": 0.727, "step": 17260 }, { "epoch": 0.78, "learning_rate": 4.3206682100867045e-06, "loss": 0.5861, "step": 17270 }, { "epoch": 0.78, "learning_rate": 4.311589268691271e-06, "loss": 0.6994, "step": 17280 }, { "epoch": 0.78, "learning_rate": 4.302510327295837e-06, "loss": 0.6826, "step": 17290 }, { "epoch": 0.79, "learning_rate": 4.2934313859004046e-06, "loss": 0.5617, "step": 17300 }, { "epoch": 0.79, "learning_rate": 4.284352444504971e-06, "loss": 0.6279, "step": 17310 }, { "epoch": 0.79, "learning_rate": 4.275273503109538e-06, "loss": 0.5873, "step": 17320 }, { "epoch": 0.79, "learning_rate": 4.266194561714105e-06, "loss": 0.742, "step": 17330 }, { "epoch": 0.79, "learning_rate": 4.257115620318671e-06, "loss": 0.7314, "step": 17340 }, { "epoch": 0.79, "learning_rate": 4.248036678923238e-06, "loss": 0.551, "step": 17350 }, { "epoch": 0.79, "learning_rate": 4.238957737527805e-06, "loss": 0.7438, "step": 17360 }, { "epoch": 0.79, "learning_rate": 4.2298787961323714e-06, "loss": 0.6326, "step": 17370 }, { "epoch": 0.79, "learning_rate": 4.220799854736938e-06, "loss": 0.6129, "step": 17380 }, { "epoch": 0.79, "learning_rate": 4.211720913341505e-06, "loss": 0.802, "step": 17390 }, { "epoch": 0.79, "learning_rate": 4.2026419719460715e-06, "loss": 0.6312, "step": 17400 }, { "epoch": 0.79, "learning_rate": 4.193563030550638e-06, "loss": 0.6674, "step": 17410 }, { "epoch": 0.79, "learning_rate": 4.184484089155205e-06, "loss": 0.8195, "step": 17420 }, { "epoch": 0.79, "learning_rate": 4.175405147759772e-06, "loss": 0.6348, "step": 17430 }, { "epoch": 0.79, "learning_rate": 4.166326206364338e-06, "loss": 0.6832, "step": 17440 }, { "epoch": 0.79, "learning_rate": 4.157247264968905e-06, "loss": 0.5834, "step": 17450 }, { "epoch": 0.79, "learning_rate": 4.148168323573472e-06, "loss": 0.6127, "step": 17460 }, { "epoch": 0.79, "learning_rate": 4.139089382178038e-06, "loss": 0.6381, "step": 17470 }, { "epoch": 0.79, "learning_rate": 4.130010440782605e-06, "loss": 0.5922, "step": 17480 }, { "epoch": 0.79, "learning_rate": 4.120931499387172e-06, "loss": 0.6482, "step": 17490 }, { "epoch": 0.79, "learning_rate": 4.1118525579917385e-06, "loss": 0.6711, "step": 17500 }, { "epoch": 0.79, "eval_accuracy": 0.6301303718010622, "eval_loss": 0.6614124178886414, "eval_runtime": 72.5414, "eval_samples_per_second": 57.098, "eval_steps_per_second": 14.281, "step": 17500 }, { "epoch": 0.79, "learning_rate": 4.102773616596305e-06, "loss": 0.5473, "step": 17510 }, { "epoch": 0.8, "learning_rate": 4.093694675200872e-06, "loss": 0.6637, "step": 17520 }, { "epoch": 0.8, "learning_rate": 4.084615733805439e-06, "loss": 0.724, "step": 17530 }, { "epoch": 0.8, "learning_rate": 4.075536792410005e-06, "loss": 0.615, "step": 17540 }, { "epoch": 0.8, "learning_rate": 4.066457851014572e-06, "loss": 0.6775, "step": 17550 }, { "epoch": 0.8, "learning_rate": 4.057378909619139e-06, "loss": 0.6363, "step": 17560 }, { "epoch": 0.8, "learning_rate": 4.048299968223705e-06, "loss": 0.7193, "step": 17570 }, { "epoch": 0.8, "learning_rate": 4.039221026828272e-06, "loss": 0.6891, "step": 17580 }, { "epoch": 0.8, "learning_rate": 4.030142085432839e-06, "loss": 0.7309, "step": 17590 }, { "epoch": 0.8, "learning_rate": 4.0210631440374055e-06, "loss": 0.8832, "step": 17600 }, { "epoch": 0.8, "learning_rate": 4.011984202641972e-06, "loss": 0.651, "step": 17610 }, { "epoch": 0.8, "learning_rate": 4.002905261246539e-06, "loss": 0.7375, "step": 17620 }, { "epoch": 0.8, "learning_rate": 3.9938263198511055e-06, "loss": 0.785, "step": 17630 }, { "epoch": 0.8, "learning_rate": 3.984747378455672e-06, "loss": 0.7391, "step": 17640 }, { "epoch": 0.8, "learning_rate": 3.975668437060239e-06, "loss": 0.6408, "step": 17650 }, { "epoch": 0.8, "learning_rate": 3.966589495664806e-06, "loss": 0.5838, "step": 17660 }, { "epoch": 0.8, "learning_rate": 3.957510554269372e-06, "loss": 0.5918, "step": 17670 }, { "epoch": 0.8, "learning_rate": 3.948431612873939e-06, "loss": 0.5525, "step": 17680 }, { "epoch": 0.8, "learning_rate": 3.939352671478506e-06, "loss": 0.7316, "step": 17690 }, { "epoch": 0.8, "learning_rate": 3.930273730083073e-06, "loss": 0.7426, "step": 17700 }, { "epoch": 0.8, "learning_rate": 3.921194788687639e-06, "loss": 0.6811, "step": 17710 }, { "epoch": 0.8, "learning_rate": 3.912115847292206e-06, "loss": 0.6797, "step": 17720 }, { "epoch": 0.8, "learning_rate": 3.9030369058967725e-06, "loss": 0.5119, "step": 17730 }, { "epoch": 0.81, "learning_rate": 3.893957964501339e-06, "loss": 0.6432, "step": 17740 }, { "epoch": 0.81, "learning_rate": 3.884879023105907e-06, "loss": 0.6939, "step": 17750 }, { "epoch": 0.81, "learning_rate": 3.875800081710473e-06, "loss": 0.6508, "step": 17760 }, { "epoch": 0.81, "learning_rate": 3.866721140315039e-06, "loss": 0.5836, "step": 17770 }, { "epoch": 0.81, "learning_rate": 3.857642198919606e-06, "loss": 0.6607, "step": 17780 }, { "epoch": 0.81, "learning_rate": 3.8485632575241735e-06, "loss": 0.7164, "step": 17790 }, { "epoch": 0.81, "learning_rate": 3.83948431612874e-06, "loss": 0.6176, "step": 17800 }, { "epoch": 0.81, "learning_rate": 3.830405374733306e-06, "loss": 0.6244, "step": 17810 }, { "epoch": 0.81, "learning_rate": 3.821326433337873e-06, "loss": 0.6908, "step": 17820 }, { "epoch": 0.81, "learning_rate": 3.8122474919424395e-06, "loss": 0.7539, "step": 17830 }, { "epoch": 0.81, "learning_rate": 3.8031685505470066e-06, "loss": 0.7873, "step": 17840 }, { "epoch": 0.81, "learning_rate": 3.7940896091515733e-06, "loss": 0.6102, "step": 17850 }, { "epoch": 0.81, "learning_rate": 3.78501066775614e-06, "loss": 0.5908, "step": 17860 }, { "epoch": 0.81, "learning_rate": 3.7759317263607067e-06, "loss": 0.5654, "step": 17870 }, { "epoch": 0.81, "learning_rate": 3.766852784965274e-06, "loss": 0.7463, "step": 17880 }, { "epoch": 0.81, "learning_rate": 3.75777384356984e-06, "loss": 0.6754, "step": 17890 }, { "epoch": 0.81, "learning_rate": 3.7486949021744068e-06, "loss": 0.7715, "step": 17900 }, { "epoch": 0.81, "learning_rate": 3.7396159607789735e-06, "loss": 0.6137, "step": 17910 }, { "epoch": 0.81, "learning_rate": 3.73053701938354e-06, "loss": 0.593, "step": 17920 }, { "epoch": 0.81, "learning_rate": 3.7214580779881073e-06, "loss": 0.6748, "step": 17930 }, { "epoch": 0.81, "learning_rate": 3.7123791365926735e-06, "loss": 0.618, "step": 17940 }, { "epoch": 0.81, "learning_rate": 3.7033001951972402e-06, "loss": 0.5645, "step": 17950 }, { "epoch": 0.82, "learning_rate": 3.694221253801807e-06, "loss": 0.684, "step": 17960 }, { "epoch": 0.82, "learning_rate": 3.685142312406374e-06, "loss": 0.634, "step": 17970 }, { "epoch": 0.82, "learning_rate": 3.6760633710109408e-06, "loss": 0.6729, "step": 17980 }, { "epoch": 0.82, "learning_rate": 3.666984429615507e-06, "loss": 0.7057, "step": 17990 }, { "epoch": 0.82, "learning_rate": 3.6579054882200737e-06, "loss": 0.6484, "step": 18000 }, { "epoch": 0.82, "eval_accuracy": 0.6308546595847416, "eval_loss": 0.659805417060852, "eval_runtime": 73.2829, "eval_samples_per_second": 56.521, "eval_steps_per_second": 14.137, "step": 18000 }, { "epoch": 0.82, "learning_rate": 3.6488265468246404e-06, "loss": 0.6398, "step": 18010 }, { "epoch": 0.82, "learning_rate": 3.6397476054292075e-06, "loss": 0.5816, "step": 18020 }, { "epoch": 0.82, "learning_rate": 3.6306686640337742e-06, "loss": 0.6373, "step": 18030 }, { "epoch": 0.82, "learning_rate": 3.6215897226383405e-06, "loss": 0.6348, "step": 18040 }, { "epoch": 0.82, "learning_rate": 3.612510781242907e-06, "loss": 0.6752, "step": 18050 }, { "epoch": 0.82, "learning_rate": 3.6034318398474743e-06, "loss": 0.7084, "step": 18060 }, { "epoch": 0.82, "learning_rate": 3.594352898452041e-06, "loss": 0.5984, "step": 18070 }, { "epoch": 0.82, "learning_rate": 3.5852739570566077e-06, "loss": 0.5811, "step": 18080 }, { "epoch": 0.82, "learning_rate": 3.576195015661174e-06, "loss": 0.7691, "step": 18090 }, { "epoch": 0.82, "learning_rate": 3.5671160742657407e-06, "loss": 0.6145, "step": 18100 }, { "epoch": 0.82, "learning_rate": 3.558037132870308e-06, "loss": 0.6488, "step": 18110 }, { "epoch": 0.82, "learning_rate": 3.5489581914748745e-06, "loss": 0.6141, "step": 18120 }, { "epoch": 0.82, "learning_rate": 3.539879250079441e-06, "loss": 0.8035, "step": 18130 }, { "epoch": 0.82, "learning_rate": 3.5308003086840075e-06, "loss": 0.6059, "step": 18140 }, { "epoch": 0.82, "learning_rate": 3.521721367288574e-06, "loss": 0.6791, "step": 18150 }, { "epoch": 0.82, "learning_rate": 3.5126424258931413e-06, "loss": 0.5494, "step": 18160 }, { "epoch": 0.82, "learning_rate": 3.503563484497708e-06, "loss": 0.6541, "step": 18170 }, { "epoch": 0.83, "learning_rate": 3.4944845431022747e-06, "loss": 0.6811, "step": 18180 }, { "epoch": 0.83, "learning_rate": 3.485405601706841e-06, "loss": 0.6953, "step": 18190 }, { "epoch": 0.83, "learning_rate": 3.476326660311408e-06, "loss": 0.7562, "step": 18200 }, { "epoch": 0.83, "learning_rate": 3.4672477189159748e-06, "loss": 0.649, "step": 18210 }, { "epoch": 0.83, "learning_rate": 3.4581687775205415e-06, "loss": 0.692, "step": 18220 }, { "epoch": 0.83, "learning_rate": 3.449089836125108e-06, "loss": 0.633, "step": 18230 }, { "epoch": 0.83, "learning_rate": 3.4400108947296744e-06, "loss": 0.6006, "step": 18240 }, { "epoch": 0.83, "learning_rate": 3.4309319533342416e-06, "loss": 0.526, "step": 18250 }, { "epoch": 0.83, "learning_rate": 3.4218530119388082e-06, "loss": 0.6, "step": 18260 }, { "epoch": 0.83, "learning_rate": 3.412774070543375e-06, "loss": 0.7064, "step": 18270 }, { "epoch": 0.83, "learning_rate": 3.4036951291479412e-06, "loss": 0.7047, "step": 18280 }, { "epoch": 0.83, "learning_rate": 3.3946161877525088e-06, "loss": 0.7266, "step": 18290 }, { "epoch": 0.83, "learning_rate": 3.385537246357075e-06, "loss": 0.6955, "step": 18300 }, { "epoch": 0.83, "learning_rate": 3.3764583049616417e-06, "loss": 0.5686, "step": 18310 }, { "epoch": 0.83, "learning_rate": 3.3673793635662084e-06, "loss": 0.5816, "step": 18320 }, { "epoch": 0.83, "learning_rate": 3.3583004221707747e-06, "loss": 0.6555, "step": 18330 }, { "epoch": 0.83, "learning_rate": 3.3492214807753422e-06, "loss": 0.5975, "step": 18340 }, { "epoch": 0.83, "learning_rate": 3.3401425393799085e-06, "loss": 0.6977, "step": 18350 }, { "epoch": 0.83, "learning_rate": 3.331063597984475e-06, "loss": 0.6643, "step": 18360 }, { "epoch": 0.83, "learning_rate": 3.321984656589042e-06, "loss": 0.7504, "step": 18370 }, { "epoch": 0.83, "learning_rate": 3.312905715193609e-06, "loss": 0.6539, "step": 18380 }, { "epoch": 0.83, "learning_rate": 3.3038267737981757e-06, "loss": 0.7016, "step": 18390 }, { "epoch": 0.84, "learning_rate": 3.294747832402742e-06, "loss": 0.5623, "step": 18400 }, { "epoch": 0.84, "learning_rate": 3.2856688910073087e-06, "loss": 0.5807, "step": 18410 }, { "epoch": 0.84, "learning_rate": 3.2765899496118754e-06, "loss": 0.701, "step": 18420 }, { "epoch": 0.84, "learning_rate": 3.2675110082164425e-06, "loss": 0.6535, "step": 18430 }, { "epoch": 0.84, "learning_rate": 3.2584320668210088e-06, "loss": 0.6252, "step": 18440 }, { "epoch": 0.84, "learning_rate": 3.2493531254255755e-06, "loss": 0.576, "step": 18450 }, { "epoch": 0.84, "learning_rate": 3.240274184030142e-06, "loss": 0.7225, "step": 18460 }, { "epoch": 0.84, "learning_rate": 3.2311952426347093e-06, "loss": 0.574, "step": 18470 }, { "epoch": 0.84, "learning_rate": 3.222116301239276e-06, "loss": 0.6803, "step": 18480 }, { "epoch": 0.84, "learning_rate": 3.2130373598438423e-06, "loss": 0.6945, "step": 18490 }, { "epoch": 0.84, "learning_rate": 3.203958418448409e-06, "loss": 0.5729, "step": 18500 }, { "epoch": 0.84, "eval_accuracy": 0.6269917914051183, "eval_loss": 0.6618302464485168, "eval_runtime": 74.0627, "eval_samples_per_second": 55.926, "eval_steps_per_second": 13.988, "step": 18500 }, { "epoch": 0.84, "learning_rate": 3.1948794770529757e-06, "loss": 0.6121, "step": 18510 }, { "epoch": 0.84, "learning_rate": 3.1858005356575428e-06, "loss": 0.632, "step": 18520 }, { "epoch": 0.84, "learning_rate": 3.1767215942621095e-06, "loss": 0.5984, "step": 18530 }, { "epoch": 0.84, "learning_rate": 3.1676426528666757e-06, "loss": 0.8053, "step": 18540 }, { "epoch": 0.84, "learning_rate": 3.1585637114712424e-06, "loss": 0.7311, "step": 18550 }, { "epoch": 0.84, "learning_rate": 3.1494847700758096e-06, "loss": 0.6496, "step": 18560 }, { "epoch": 0.84, "learning_rate": 3.1404058286803763e-06, "loss": 0.7105, "step": 18570 }, { "epoch": 0.84, "learning_rate": 3.131326887284943e-06, "loss": 0.6768, "step": 18580 }, { "epoch": 0.84, "learning_rate": 3.1222479458895092e-06, "loss": 0.7305, "step": 18590 }, { "epoch": 0.84, "learning_rate": 3.113169004494076e-06, "loss": 0.8035, "step": 18600 }, { "epoch": 0.84, "learning_rate": 3.104090063098643e-06, "loss": 0.6086, "step": 18610 }, { "epoch": 0.85, "learning_rate": 3.0950111217032097e-06, "loss": 0.6119, "step": 18620 }, { "epoch": 0.85, "learning_rate": 3.0859321803077764e-06, "loss": 0.7191, "step": 18630 }, { "epoch": 0.85, "learning_rate": 3.0768532389123427e-06, "loss": 0.648, "step": 18640 }, { "epoch": 0.85, "learning_rate": 3.06777429751691e-06, "loss": 0.7826, "step": 18650 }, { "epoch": 0.85, "learning_rate": 3.0586953561214765e-06, "loss": 0.7137, "step": 18660 }, { "epoch": 0.85, "learning_rate": 3.0496164147260432e-06, "loss": 0.8223, "step": 18670 }, { "epoch": 0.85, "learning_rate": 3.04053747333061e-06, "loss": 0.7203, "step": 18680 }, { "epoch": 0.85, "learning_rate": 3.031458531935176e-06, "loss": 0.6809, "step": 18690 }, { "epoch": 0.85, "learning_rate": 3.0223795905397433e-06, "loss": 0.6203, "step": 18700 }, { "epoch": 0.85, "learning_rate": 3.01330064914431e-06, "loss": 0.6621, "step": 18710 }, { "epoch": 0.85, "learning_rate": 3.0042217077488767e-06, "loss": 0.6363, "step": 18720 }, { "epoch": 0.85, "learning_rate": 2.9951427663534434e-06, "loss": 0.6408, "step": 18730 }, { "epoch": 0.85, "learning_rate": 2.9860638249580105e-06, "loss": 0.5842, "step": 18740 }, { "epoch": 0.85, "learning_rate": 2.9769848835625768e-06, "loss": 0.7553, "step": 18750 }, { "epoch": 0.85, "learning_rate": 2.9679059421671435e-06, "loss": 0.7262, "step": 18760 }, { "epoch": 0.85, "learning_rate": 2.95882700077171e-06, "loss": 0.6549, "step": 18770 }, { "epoch": 0.85, "learning_rate": 2.949748059376277e-06, "loss": 0.7242, "step": 18780 }, { "epoch": 0.85, "learning_rate": 2.940669117980844e-06, "loss": 0.625, "step": 18790 }, { "epoch": 0.85, "learning_rate": 2.9315901765854103e-06, "loss": 0.657, "step": 18800 }, { "epoch": 0.85, "learning_rate": 2.922511235189977e-06, "loss": 0.7918, "step": 18810 }, { "epoch": 0.85, "learning_rate": 2.9134322937945437e-06, "loss": 0.6266, "step": 18820 }, { "epoch": 0.85, "learning_rate": 2.9043533523991108e-06, "loss": 0.6428, "step": 18830 }, { "epoch": 0.86, "learning_rate": 2.8952744110036775e-06, "loss": 0.7258, "step": 18840 }, { "epoch": 0.86, "learning_rate": 2.8861954696082437e-06, "loss": 0.6338, "step": 18850 }, { "epoch": 0.86, "learning_rate": 2.8771165282128104e-06, "loss": 0.5566, "step": 18860 }, { "epoch": 0.86, "learning_rate": 2.868037586817377e-06, "loss": 0.6768, "step": 18870 }, { "epoch": 0.86, "learning_rate": 2.8589586454219443e-06, "loss": 0.7463, "step": 18880 }, { "epoch": 0.86, "learning_rate": 2.849879704026511e-06, "loss": 0.7348, "step": 18890 }, { "epoch": 0.86, "learning_rate": 2.8408007626310772e-06, "loss": 0.741, "step": 18900 }, { "epoch": 0.86, "learning_rate": 2.831721821235644e-06, "loss": 0.7045, "step": 18910 }, { "epoch": 0.86, "learning_rate": 2.822642879840211e-06, "loss": 0.6104, "step": 18920 }, { "epoch": 0.86, "learning_rate": 2.8135639384447777e-06, "loss": 0.5941, "step": 18930 }, { "epoch": 0.86, "learning_rate": 2.8044849970493444e-06, "loss": 0.7625, "step": 18940 }, { "epoch": 0.86, "learning_rate": 2.7954060556539107e-06, "loss": 0.6734, "step": 18950 }, { "epoch": 0.86, "learning_rate": 2.7863271142584774e-06, "loss": 0.7344, "step": 18960 }, { "epoch": 0.86, "learning_rate": 2.7772481728630445e-06, "loss": 0.6715, "step": 18970 }, { "epoch": 0.86, "learning_rate": 2.7681692314676112e-06, "loss": 0.7699, "step": 18980 }, { "epoch": 0.86, "learning_rate": 2.759090290072178e-06, "loss": 0.6113, "step": 18990 }, { "epoch": 0.86, "learning_rate": 2.750011348676744e-06, "loss": 0.634, "step": 19000 }, { "epoch": 0.86, "eval_accuracy": 0.629406084017383, "eval_loss": 0.6582936644554138, "eval_runtime": 74.1488, "eval_samples_per_second": 55.861, "eval_steps_per_second": 13.972, "step": 19000 }, { "epoch": 0.86, "learning_rate": 2.7409324072813113e-06, "loss": 0.5986, "step": 19010 }, { "epoch": 0.86, "learning_rate": 2.731853465885878e-06, "loss": 0.7604, "step": 19020 }, { "epoch": 0.86, "learning_rate": 2.7227745244904447e-06, "loss": 0.6496, "step": 19030 }, { "epoch": 0.86, "learning_rate": 2.7136955830950114e-06, "loss": 0.6102, "step": 19040 }, { "epoch": 0.86, "learning_rate": 2.7046166416995777e-06, "loss": 0.7336, "step": 19050 }, { "epoch": 0.87, "learning_rate": 2.6955377003041448e-06, "loss": 0.7105, "step": 19060 }, { "epoch": 0.87, "learning_rate": 2.6864587589087115e-06, "loss": 0.6516, "step": 19070 }, { "epoch": 0.87, "learning_rate": 2.677379817513278e-06, "loss": 0.6199, "step": 19080 }, { "epoch": 0.87, "learning_rate": 2.668300876117845e-06, "loss": 0.6559, "step": 19090 }, { "epoch": 0.87, "learning_rate": 2.659221934722412e-06, "loss": 0.6232, "step": 19100 }, { "epoch": 0.87, "learning_rate": 2.6501429933269783e-06, "loss": 0.634, "step": 19110 }, { "epoch": 0.87, "learning_rate": 2.641064051931545e-06, "loss": 0.6158, "step": 19120 }, { "epoch": 0.87, "learning_rate": 2.6319851105361117e-06, "loss": 0.685, "step": 19130 }, { "epoch": 0.87, "learning_rate": 2.6229061691406784e-06, "loss": 0.6271, "step": 19140 }, { "epoch": 0.87, "learning_rate": 2.6138272277452455e-06, "loss": 0.6379, "step": 19150 }, { "epoch": 0.87, "learning_rate": 2.6047482863498117e-06, "loss": 0.6584, "step": 19160 }, { "epoch": 0.87, "learning_rate": 2.5956693449543784e-06, "loss": 0.6168, "step": 19170 }, { "epoch": 0.87, "learning_rate": 2.586590403558945e-06, "loss": 0.5928, "step": 19180 }, { "epoch": 0.87, "learning_rate": 2.577511462163512e-06, "loss": 0.5943, "step": 19190 }, { "epoch": 0.87, "learning_rate": 2.568432520768079e-06, "loss": 0.7664, "step": 19200 }, { "epoch": 0.87, "learning_rate": 2.5593535793726452e-06, "loss": 0.698, "step": 19210 }, { "epoch": 0.87, "learning_rate": 2.550274637977212e-06, "loss": 0.6602, "step": 19220 }, { "epoch": 0.87, "learning_rate": 2.5411956965817786e-06, "loss": 0.6725, "step": 19230 }, { "epoch": 0.87, "learning_rate": 2.5321167551863457e-06, "loss": 0.6062, "step": 19240 }, { "epoch": 0.87, "learning_rate": 2.5230378137909124e-06, "loss": 0.6545, "step": 19250 }, { "epoch": 0.87, "learning_rate": 2.5139588723954787e-06, "loss": 0.5836, "step": 19260 }, { "epoch": 0.87, "learning_rate": 2.5048799310000454e-06, "loss": 0.6107, "step": 19270 }, { "epoch": 0.88, "learning_rate": 2.495800989604612e-06, "loss": 0.6293, "step": 19280 }, { "epoch": 0.88, "learning_rate": 2.486722048209179e-06, "loss": 0.6992, "step": 19290 }, { "epoch": 0.88, "learning_rate": 2.477643106813746e-06, "loss": 0.5889, "step": 19300 }, { "epoch": 0.88, "learning_rate": 2.468564165418312e-06, "loss": 0.6504, "step": 19310 }, { "epoch": 0.88, "learning_rate": 2.4594852240228793e-06, "loss": 0.6891, "step": 19320 }, { "epoch": 0.88, "learning_rate": 2.4504062826274456e-06, "loss": 0.6273, "step": 19330 }, { "epoch": 0.88, "learning_rate": 2.4413273412320127e-06, "loss": 0.6385, "step": 19340 }, { "epoch": 0.88, "learning_rate": 2.4322483998365794e-06, "loss": 0.6826, "step": 19350 }, { "epoch": 0.88, "learning_rate": 2.423169458441146e-06, "loss": 0.6477, "step": 19360 }, { "epoch": 0.88, "learning_rate": 2.414090517045713e-06, "loss": 0.6295, "step": 19370 }, { "epoch": 0.88, "learning_rate": 2.405011575650279e-06, "loss": 0.6473, "step": 19380 }, { "epoch": 0.88, "learning_rate": 2.395932634254846e-06, "loss": 0.7258, "step": 19390 }, { "epoch": 0.88, "learning_rate": 2.386853692859413e-06, "loss": 0.7238, "step": 19400 }, { "epoch": 0.88, "learning_rate": 2.3777747514639796e-06, "loss": 0.602, "step": 19410 }, { "epoch": 0.88, "learning_rate": 2.3686958100685463e-06, "loss": 0.6506, "step": 19420 }, { "epoch": 0.88, "learning_rate": 2.359616868673113e-06, "loss": 0.657, "step": 19430 }, { "epoch": 0.88, "learning_rate": 2.3505379272776797e-06, "loss": 0.5479, "step": 19440 }, { "epoch": 0.88, "learning_rate": 2.3414589858822464e-06, "loss": 0.7652, "step": 19450 }, { "epoch": 0.88, "learning_rate": 2.332380044486813e-06, "loss": 0.6119, "step": 19460 }, { "epoch": 0.88, "learning_rate": 2.3233011030913798e-06, "loss": 0.625, "step": 19470 }, { "epoch": 0.88, "learning_rate": 2.3142221616959464e-06, "loss": 0.634, "step": 19480 }, { "epoch": 0.88, "learning_rate": 2.305143220300513e-06, "loss": 0.6096, "step": 19490 }, { "epoch": 0.89, "learning_rate": 2.29606427890508e-06, "loss": 0.6139, "step": 19500 }, { "epoch": 0.89, "eval_accuracy": 0.6296475132786093, "eval_loss": 0.6594942212104797, "eval_runtime": 73.3398, "eval_samples_per_second": 56.477, "eval_steps_per_second": 14.126, "step": 19500 }, { "epoch": 0.89, "learning_rate": 2.2869853375096465e-06, "loss": 0.6381, "step": 19510 }, { "epoch": 0.89, "learning_rate": 2.2779063961142132e-06, "loss": 0.6336, "step": 19520 }, { "epoch": 0.89, "learning_rate": 2.26882745471878e-06, "loss": 0.7096, "step": 19530 }, { "epoch": 0.89, "learning_rate": 2.2597485133233466e-06, "loss": 0.6396, "step": 19540 }, { "epoch": 0.89, "learning_rate": 2.2506695719279133e-06, "loss": 0.6719, "step": 19550 }, { "epoch": 0.89, "learning_rate": 2.24159063053248e-06, "loss": 0.6197, "step": 19560 }, { "epoch": 0.89, "learning_rate": 2.2325116891370467e-06, "loss": 0.6291, "step": 19570 }, { "epoch": 0.89, "learning_rate": 2.2234327477416134e-06, "loss": 0.6397, "step": 19580 }, { "epoch": 0.89, "learning_rate": 2.21435380634618e-06, "loss": 0.7871, "step": 19590 }, { "epoch": 0.89, "learning_rate": 2.205274864950747e-06, "loss": 0.7424, "step": 19600 }, { "epoch": 0.89, "learning_rate": 2.196195923555314e-06, "loss": 0.7363, "step": 19610 }, { "epoch": 0.89, "learning_rate": 2.18711698215988e-06, "loss": 0.7027, "step": 19620 }, { "epoch": 0.89, "learning_rate": 2.178038040764447e-06, "loss": 0.627, "step": 19630 }, { "epoch": 0.89, "learning_rate": 2.1689590993690136e-06, "loss": 0.7043, "step": 19640 }, { "epoch": 0.89, "learning_rate": 2.1598801579735803e-06, "loss": 0.5365, "step": 19650 }, { "epoch": 0.89, "learning_rate": 2.1508012165781474e-06, "loss": 0.607, "step": 19660 }, { "epoch": 0.89, "learning_rate": 2.1417222751827137e-06, "loss": 0.6062, "step": 19670 }, { "epoch": 0.89, "learning_rate": 2.132643333787281e-06, "loss": 0.7021, "step": 19680 }, { "epoch": 0.89, "learning_rate": 2.123564392391847e-06, "loss": 0.5836, "step": 19690 }, { "epoch": 0.89, "learning_rate": 2.114485450996414e-06, "loss": 0.6492, "step": 19700 }, { "epoch": 0.89, "learning_rate": 2.105406509600981e-06, "loss": 0.5834, "step": 19710 }, { "epoch": 0.9, "learning_rate": 2.096327568205547e-06, "loss": 0.5445, "step": 19720 }, { "epoch": 0.9, "learning_rate": 2.0872486268101143e-06, "loss": 0.5725, "step": 19730 }, { "epoch": 0.9, "learning_rate": 2.0781696854146805e-06, "loss": 0.6119, "step": 19740 }, { "epoch": 0.9, "learning_rate": 2.0690907440192477e-06, "loss": 0.5551, "step": 19750 }, { "epoch": 0.9, "learning_rate": 2.0600118026238144e-06, "loss": 0.6842, "step": 19760 }, { "epoch": 0.9, "learning_rate": 2.050932861228381e-06, "loss": 0.6477, "step": 19770 }, { "epoch": 0.9, "learning_rate": 2.0418539198329478e-06, "loss": 0.6268, "step": 19780 }, { "epoch": 0.9, "learning_rate": 2.0327749784375144e-06, "loss": 0.6814, "step": 19790 }, { "epoch": 0.9, "learning_rate": 2.023696037042081e-06, "loss": 0.6207, "step": 19800 }, { "epoch": 0.9, "learning_rate": 2.014617095646648e-06, "loss": 0.5721, "step": 19810 }, { "epoch": 0.9, "learning_rate": 2.0055381542512145e-06, "loss": 0.6545, "step": 19820 }, { "epoch": 0.9, "learning_rate": 1.9964592128557812e-06, "loss": 0.7377, "step": 19830 }, { "epoch": 0.9, "learning_rate": 1.987380271460348e-06, "loss": 0.6426, "step": 19840 }, { "epoch": 0.9, "learning_rate": 1.9783013300649146e-06, "loss": 0.6916, "step": 19850 }, { "epoch": 0.9, "learning_rate": 1.9692223886694813e-06, "loss": 0.609, "step": 19860 }, { "epoch": 0.9, "learning_rate": 1.960143447274048e-06, "loss": 0.7535, "step": 19870 }, { "epoch": 0.9, "learning_rate": 1.9510645058786147e-06, "loss": 0.6238, "step": 19880 }, { "epoch": 0.9, "learning_rate": 1.9419855644831814e-06, "loss": 0.7176, "step": 19890 }, { "epoch": 0.9, "learning_rate": 1.932906623087748e-06, "loss": 0.6641, "step": 19900 }, { "epoch": 0.9, "learning_rate": 1.923827681692315e-06, "loss": 0.5863, "step": 19910 }, { "epoch": 0.9, "learning_rate": 1.9147487402968815e-06, "loss": 0.6531, "step": 19920 }, { "epoch": 0.9, "learning_rate": 1.9056697989014484e-06, "loss": 0.6604, "step": 19930 }, { "epoch": 0.91, "learning_rate": 1.8965908575060149e-06, "loss": 0.6605, "step": 19940 }, { "epoch": 0.91, "learning_rate": 1.8875119161105818e-06, "loss": 0.7225, "step": 19950 }, { "epoch": 0.91, "learning_rate": 1.8784329747151483e-06, "loss": 0.7346, "step": 19960 }, { "epoch": 0.91, "learning_rate": 1.8693540333197152e-06, "loss": 0.6707, "step": 19970 }, { "epoch": 0.91, "learning_rate": 1.8602750919242819e-06, "loss": 0.6119, "step": 19980 }, { "epoch": 0.91, "learning_rate": 1.8511961505288484e-06, "loss": 0.6988, "step": 19990 }, { "epoch": 0.91, "learning_rate": 1.8421172091334153e-06, "loss": 0.6289, "step": 20000 }, { "epoch": 0.91, "eval_accuracy": 0.6286817962337036, "eval_loss": 0.6617236733436584, "eval_runtime": 74.5822, "eval_samples_per_second": 55.536, "eval_steps_per_second": 13.891, "step": 20000 }, { "epoch": 0.91, "learning_rate": 1.8330382677379818e-06, "loss": 0.6711, "step": 20010 }, { "epoch": 0.91, "learning_rate": 1.8239593263425487e-06, "loss": 0.5879, "step": 20020 }, { "epoch": 0.91, "learning_rate": 1.8148803849471154e-06, "loss": 0.7063, "step": 20030 }, { "epoch": 0.91, "learning_rate": 1.805801443551682e-06, "loss": 0.5883, "step": 20040 }, { "epoch": 0.91, "learning_rate": 1.7967225021562488e-06, "loss": 0.5684, "step": 20050 }, { "epoch": 0.91, "learning_rate": 1.7876435607608152e-06, "loss": 0.6596, "step": 20060 }, { "epoch": 0.91, "learning_rate": 1.7785646193653822e-06, "loss": 0.5432, "step": 20070 }, { "epoch": 0.91, "learning_rate": 1.7694856779699488e-06, "loss": 0.5748, "step": 20080 }, { "epoch": 0.91, "learning_rate": 1.7604067365745155e-06, "loss": 0.7727, "step": 20090 }, { "epoch": 0.91, "learning_rate": 1.7513277951790822e-06, "loss": 0.5312, "step": 20100 }, { "epoch": 0.91, "learning_rate": 1.7422488537836491e-06, "loss": 0.717, "step": 20110 }, { "epoch": 0.91, "learning_rate": 1.7331699123882156e-06, "loss": 0.6273, "step": 20120 }, { "epoch": 0.91, "learning_rate": 1.7240909709927825e-06, "loss": 0.627, "step": 20130 }, { "epoch": 0.91, "learning_rate": 1.715012029597349e-06, "loss": 0.8084, "step": 20140 }, { "epoch": 0.91, "learning_rate": 1.7059330882019157e-06, "loss": 0.557, "step": 20150 }, { "epoch": 0.92, "learning_rate": 1.6968541468064826e-06, "loss": 0.75, "step": 20160 }, { "epoch": 0.92, "learning_rate": 1.6877752054110491e-06, "loss": 0.7277, "step": 20170 }, { "epoch": 0.92, "learning_rate": 1.678696264015616e-06, "loss": 0.6887, "step": 20180 }, { "epoch": 0.92, "learning_rate": 1.6696173226201825e-06, "loss": 0.5959, "step": 20190 }, { "epoch": 0.92, "learning_rate": 1.6605383812247494e-06, "loss": 0.6359, "step": 20200 }, { "epoch": 0.92, "learning_rate": 1.651459439829316e-06, "loss": 0.5998, "step": 20210 }, { "epoch": 0.92, "learning_rate": 1.6423804984338828e-06, "loss": 0.585, "step": 20220 }, { "epoch": 0.92, "learning_rate": 1.6333015570384495e-06, "loss": 0.6037, "step": 20230 }, { "epoch": 0.92, "learning_rate": 1.624222615643016e-06, "loss": 0.7137, "step": 20240 }, { "epoch": 0.92, "learning_rate": 1.615143674247583e-06, "loss": 0.7148, "step": 20250 }, { "epoch": 0.92, "learning_rate": 1.6060647328521494e-06, "loss": 0.6832, "step": 20260 }, { "epoch": 0.92, "learning_rate": 1.5969857914567163e-06, "loss": 0.6246, "step": 20270 }, { "epoch": 0.92, "learning_rate": 1.587906850061283e-06, "loss": 0.6691, "step": 20280 }, { "epoch": 0.92, "learning_rate": 1.5788279086658497e-06, "loss": 0.6387, "step": 20290 }, { "epoch": 0.92, "learning_rate": 1.5697489672704164e-06, "loss": 0.6922, "step": 20300 }, { "epoch": 0.92, "learning_rate": 1.5606700258749833e-06, "loss": 0.6711, "step": 20310 }, { "epoch": 0.92, "learning_rate": 1.5515910844795498e-06, "loss": 0.693, "step": 20320 }, { "epoch": 0.92, "learning_rate": 1.5425121430841165e-06, "loss": 0.618, "step": 20330 }, { "epoch": 0.92, "learning_rate": 1.5334332016886832e-06, "loss": 0.6441, "step": 20340 }, { "epoch": 0.92, "learning_rate": 1.5243542602932499e-06, "loss": 0.6623, "step": 20350 }, { "epoch": 0.92, "learning_rate": 1.5152753188978168e-06, "loss": 0.6813, "step": 20360 }, { "epoch": 0.92, "learning_rate": 1.5061963775023832e-06, "loss": 0.6551, "step": 20370 }, { "epoch": 0.93, "learning_rate": 1.4971174361069502e-06, "loss": 0.6084, "step": 20380 }, { "epoch": 0.93, "learning_rate": 1.4880384947115166e-06, "loss": 0.6887, "step": 20390 }, { "epoch": 0.93, "learning_rate": 1.4789595533160835e-06, "loss": 0.7918, "step": 20400 }, { "epoch": 0.93, "learning_rate": 1.4698806119206502e-06, "loss": 0.626, "step": 20410 }, { "epoch": 0.93, "learning_rate": 1.4608016705252167e-06, "loss": 0.6773, "step": 20420 }, { "epoch": 0.93, "learning_rate": 1.4517227291297836e-06, "loss": 0.6996, "step": 20430 }, { "epoch": 0.93, "learning_rate": 1.4426437877343501e-06, "loss": 0.6697, "step": 20440 }, { "epoch": 0.93, "learning_rate": 1.433564846338917e-06, "loss": 0.5686, "step": 20450 }, { "epoch": 0.93, "learning_rate": 1.4244859049434837e-06, "loss": 0.6312, "step": 20460 }, { "epoch": 0.93, "learning_rate": 1.4154069635480504e-06, "loss": 0.6873, "step": 20470 }, { "epoch": 0.93, "learning_rate": 1.4063280221526171e-06, "loss": 0.7268, "step": 20480 }, { "epoch": 0.93, "learning_rate": 1.397249080757184e-06, "loss": 0.6863, "step": 20490 }, { "epoch": 0.93, "learning_rate": 1.3881701393617505e-06, "loss": 0.5889, "step": 20500 }, { "epoch": 0.93, "eval_accuracy": 0.6301303718010622, "eval_loss": 0.6595224738121033, "eval_runtime": 73.6905, "eval_samples_per_second": 56.208, "eval_steps_per_second": 14.059, "step": 20500 }, { "epoch": 0.93, "learning_rate": 1.3790911979663172e-06, "loss": 0.5518, "step": 20510 }, { "epoch": 0.93, "learning_rate": 1.370012256570884e-06, "loss": 0.7367, "step": 20520 }, { "epoch": 0.93, "learning_rate": 1.3609333151754506e-06, "loss": 0.5818, "step": 20530 }, { "epoch": 0.93, "learning_rate": 1.3518543737800175e-06, "loss": 0.7182, "step": 20540 }, { "epoch": 0.93, "learning_rate": 1.342775432384584e-06, "loss": 0.5711, "step": 20550 }, { "epoch": 0.93, "learning_rate": 1.333696490989151e-06, "loss": 0.6314, "step": 20560 }, { "epoch": 0.93, "learning_rate": 1.3246175495937174e-06, "loss": 0.6828, "step": 20570 }, { "epoch": 0.93, "learning_rate": 1.315538608198284e-06, "loss": 0.6562, "step": 20580 }, { "epoch": 0.93, "learning_rate": 1.306459666802851e-06, "loss": 0.8039, "step": 20590 }, { "epoch": 0.94, "learning_rate": 1.2973807254074175e-06, "loss": 0.6371, "step": 20600 }, { "epoch": 0.94, "learning_rate": 1.2883017840119844e-06, "loss": 0.6227, "step": 20610 }, { "epoch": 0.94, "learning_rate": 1.2792228426165509e-06, "loss": 0.6477, "step": 20620 }, { "epoch": 0.94, "learning_rate": 1.2701439012211178e-06, "loss": 0.7176, "step": 20630 }, { "epoch": 0.94, "learning_rate": 1.2610649598256845e-06, "loss": 0.6961, "step": 20640 }, { "epoch": 0.94, "learning_rate": 1.2519860184302512e-06, "loss": 0.5751, "step": 20650 }, { "epoch": 0.94, "learning_rate": 1.2429070770348179e-06, "loss": 0.6572, "step": 20660 }, { "epoch": 0.94, "learning_rate": 1.2338281356393846e-06, "loss": 0.6223, "step": 20670 }, { "epoch": 0.94, "learning_rate": 1.2247491942439513e-06, "loss": 0.768, "step": 20680 }, { "epoch": 0.94, "learning_rate": 1.215670252848518e-06, "loss": 0.5842, "step": 20690 }, { "epoch": 0.94, "learning_rate": 1.2065913114530846e-06, "loss": 0.6072, "step": 20700 }, { "epoch": 0.94, "learning_rate": 1.1975123700576513e-06, "loss": 0.6664, "step": 20710 }, { "epoch": 0.94, "learning_rate": 1.188433428662218e-06, "loss": 0.6254, "step": 20720 }, { "epoch": 0.94, "learning_rate": 1.1793544872667847e-06, "loss": 0.6463, "step": 20730 }, { "epoch": 0.94, "learning_rate": 1.1702755458713514e-06, "loss": 0.7381, "step": 20740 }, { "epoch": 0.94, "learning_rate": 1.1611966044759181e-06, "loss": 0.6648, "step": 20750 }, { "epoch": 0.94, "learning_rate": 1.1521176630804848e-06, "loss": 0.7143, "step": 20760 }, { "epoch": 0.94, "learning_rate": 1.1430387216850517e-06, "loss": 0.6377, "step": 20770 }, { "epoch": 0.94, "learning_rate": 1.1339597802896184e-06, "loss": 0.6785, "step": 20780 }, { "epoch": 0.94, "learning_rate": 1.1248808388941851e-06, "loss": 0.7199, "step": 20790 }, { "epoch": 0.94, "learning_rate": 1.1158018974987516e-06, "loss": 0.6695, "step": 20800 }, { "epoch": 0.94, "learning_rate": 1.1067229561033183e-06, "loss": 0.6596, "step": 20810 }, { "epoch": 0.95, "learning_rate": 1.0976440147078852e-06, "loss": 0.7838, "step": 20820 }, { "epoch": 0.95, "learning_rate": 1.088565073312452e-06, "loss": 0.6062, "step": 20830 }, { "epoch": 0.95, "learning_rate": 1.0794861319170186e-06, "loss": 0.6529, "step": 20840 }, { "epoch": 0.95, "learning_rate": 1.0704071905215853e-06, "loss": 0.7447, "step": 20850 }, { "epoch": 0.95, "learning_rate": 1.061328249126152e-06, "loss": 0.7277, "step": 20860 }, { "epoch": 0.95, "learning_rate": 1.0522493077307187e-06, "loss": 0.6977, "step": 20870 }, { "epoch": 0.95, "learning_rate": 1.0431703663352854e-06, "loss": 0.5682, "step": 20880 }, { "epoch": 0.95, "learning_rate": 1.034091424939852e-06, "loss": 0.6207, "step": 20890 }, { "epoch": 0.95, "learning_rate": 1.0250124835444188e-06, "loss": 0.6986, "step": 20900 }, { "epoch": 0.95, "learning_rate": 1.0159335421489855e-06, "loss": 0.6393, "step": 20910 }, { "epoch": 0.95, "learning_rate": 1.0068546007535522e-06, "loss": 0.609, "step": 20920 }, { "epoch": 0.95, "learning_rate": 9.977756593581189e-07, "loss": 0.727, "step": 20930 }, { "epoch": 0.95, "learning_rate": 9.886967179626856e-07, "loss": 0.6385, "step": 20940 }, { "epoch": 0.95, "learning_rate": 9.796177765672525e-07, "loss": 0.7049, "step": 20950 }, { "epoch": 0.95, "learning_rate": 9.705388351718192e-07, "loss": 0.6059, "step": 20960 }, { "epoch": 0.95, "learning_rate": 9.614598937763857e-07, "loss": 0.6453, "step": 20970 }, { "epoch": 0.95, "learning_rate": 9.523809523809525e-07, "loss": 0.61, "step": 20980 }, { "epoch": 0.95, "learning_rate": 9.433020109855191e-07, "loss": 0.7059, "step": 20990 }, { "epoch": 0.95, "learning_rate": 9.342230695900858e-07, "loss": 0.6539, "step": 21000 }, { "epoch": 0.95, "eval_accuracy": 0.6279575084500242, "eval_loss": 0.6598912477493286, "eval_runtime": 73.9974, "eval_samples_per_second": 55.975, "eval_steps_per_second": 14.0, "step": 21000 }, { "epoch": 0.95, "learning_rate": 9.251441281946525e-07, "loss": 0.7365, "step": 21010 }, { "epoch": 0.95, "learning_rate": 9.160651867992193e-07, "loss": 0.7266, "step": 21020 }, { "epoch": 0.95, "learning_rate": 9.06986245403786e-07, "loss": 0.7639, "step": 21030 }, { "epoch": 0.96, "learning_rate": 8.979073040083527e-07, "loss": 0.6879, "step": 21040 }, { "epoch": 0.96, "learning_rate": 8.888283626129194e-07, "loss": 0.7623, "step": 21050 }, { "epoch": 0.96, "learning_rate": 8.79749421217486e-07, "loss": 0.6229, "step": 21060 }, { "epoch": 0.96, "learning_rate": 8.706704798220528e-07, "loss": 0.5939, "step": 21070 }, { "epoch": 0.96, "learning_rate": 8.615915384266195e-07, "loss": 0.6016, "step": 21080 }, { "epoch": 0.96, "learning_rate": 8.525125970311862e-07, "loss": 0.5709, "step": 21090 }, { "epoch": 0.96, "learning_rate": 8.434336556357529e-07, "loss": 0.6391, "step": 21100 }, { "epoch": 0.96, "learning_rate": 8.343547142403197e-07, "loss": 0.5955, "step": 21110 }, { "epoch": 0.96, "learning_rate": 8.252757728448864e-07, "loss": 0.6453, "step": 21120 }, { "epoch": 0.96, "learning_rate": 8.161968314494531e-07, "loss": 0.5959, "step": 21130 }, { "epoch": 0.96, "learning_rate": 8.071178900540198e-07, "loss": 0.7219, "step": 21140 }, { "epoch": 0.96, "learning_rate": 7.980389486585864e-07, "loss": 0.7617, "step": 21150 }, { "epoch": 0.96, "learning_rate": 7.889600072631531e-07, "loss": 0.6307, "step": 21160 }, { "epoch": 0.96, "learning_rate": 7.798810658677199e-07, "loss": 0.6355, "step": 21170 }, { "epoch": 0.96, "learning_rate": 7.708021244722866e-07, "loss": 0.6818, "step": 21180 }, { "epoch": 0.96, "learning_rate": 7.617231830768533e-07, "loss": 0.7445, "step": 21190 }, { "epoch": 0.96, "learning_rate": 7.5264424168142e-07, "loss": 0.6539, "step": 21200 }, { "epoch": 0.96, "learning_rate": 7.435653002859868e-07, "loss": 0.6465, "step": 21210 }, { "epoch": 0.96, "learning_rate": 7.344863588905535e-07, "loss": 0.6133, "step": 21220 }, { "epoch": 0.96, "learning_rate": 7.254074174951202e-07, "loss": 0.6699, "step": 21230 }, { "epoch": 0.96, "learning_rate": 7.163284760996868e-07, "loss": 0.6191, "step": 21240 }, { "epoch": 0.96, "learning_rate": 7.072495347042535e-07, "loss": 0.5492, "step": 21250 }, { "epoch": 0.97, "learning_rate": 6.981705933088203e-07, "loss": 0.8207, "step": 21260 }, { "epoch": 0.97, "learning_rate": 6.89091651913387e-07, "loss": 0.6326, "step": 21270 }, { "epoch": 0.97, "learning_rate": 6.800127105179537e-07, "loss": 0.7082, "step": 21280 }, { "epoch": 0.97, "learning_rate": 6.709337691225204e-07, "loss": 0.7359, "step": 21290 }, { "epoch": 0.97, "learning_rate": 6.618548277270872e-07, "loss": 0.7242, "step": 21300 }, { "epoch": 0.97, "learning_rate": 6.527758863316538e-07, "loss": 0.7369, "step": 21310 }, { "epoch": 0.97, "learning_rate": 6.436969449362204e-07, "loss": 0.6687, "step": 21320 }, { "epoch": 0.97, "learning_rate": 6.346180035407871e-07, "loss": 0.7807, "step": 21330 }, { "epoch": 0.97, "learning_rate": 6.255390621453538e-07, "loss": 0.6871, "step": 21340 }, { "epoch": 0.97, "learning_rate": 6.164601207499206e-07, "loss": 0.6143, "step": 21350 }, { "epoch": 0.97, "learning_rate": 6.073811793544873e-07, "loss": 0.6752, "step": 21360 }, { "epoch": 0.97, "learning_rate": 5.98302237959054e-07, "loss": 0.6838, "step": 21370 }, { "epoch": 0.97, "learning_rate": 5.892232965636207e-07, "loss": 0.5486, "step": 21380 }, { "epoch": 0.97, "learning_rate": 5.801443551681874e-07, "loss": 0.6605, "step": 21390 }, { "epoch": 0.97, "learning_rate": 5.710654137727541e-07, "loss": 0.6533, "step": 21400 }, { "epoch": 0.97, "learning_rate": 5.619864723773208e-07, "loss": 0.616, "step": 21410 }, { "epoch": 0.97, "learning_rate": 5.529075309818876e-07, "loss": 0.6836, "step": 21420 }, { "epoch": 0.97, "learning_rate": 5.438285895864542e-07, "loss": 0.6934, "step": 21430 }, { "epoch": 0.97, "learning_rate": 5.34749648191021e-07, "loss": 0.768, "step": 21440 }, { "epoch": 0.97, "learning_rate": 5.256707067955877e-07, "loss": 0.6928, "step": 21450 }, { "epoch": 0.97, "learning_rate": 5.165917654001544e-07, "loss": 0.6156, "step": 21460 }, { "epoch": 0.97, "learning_rate": 5.075128240047211e-07, "loss": 0.6945, "step": 21470 }, { "epoch": 0.98, "learning_rate": 4.984338826092878e-07, "loss": 0.6664, "step": 21480 }, { "epoch": 0.98, "learning_rate": 4.893549412138545e-07, "loss": 0.6572, "step": 21490 }, { "epoch": 0.98, "learning_rate": 4.802759998184212e-07, "loss": 0.6396, "step": 21500 }, { "epoch": 0.98, "eval_accuracy": 0.6253017865765331, "eval_loss": 0.6602552533149719, "eval_runtime": 73.7722, "eval_samples_per_second": 56.146, "eval_steps_per_second": 14.043, "step": 21500 }, { "epoch": 0.98, "learning_rate": 4.7119705842298793e-07, "loss": 0.6551, "step": 21510 }, { "epoch": 0.98, "learning_rate": 4.6211811702755463e-07, "loss": 0.6627, "step": 21520 }, { "epoch": 0.98, "learning_rate": 4.530391756321213e-07, "loss": 0.7145, "step": 21530 }, { "epoch": 0.98, "learning_rate": 4.4396023423668807e-07, "loss": 0.6893, "step": 21540 }, { "epoch": 0.98, "learning_rate": 4.3488129284125477e-07, "loss": 0.6969, "step": 21550 }, { "epoch": 0.98, "learning_rate": 4.258023514458214e-07, "loss": 0.7125, "step": 21560 }, { "epoch": 0.98, "learning_rate": 4.1672341005038816e-07, "loss": 0.6242, "step": 21570 }, { "epoch": 0.98, "learning_rate": 4.0764446865495486e-07, "loss": 0.635, "step": 21580 }, { "epoch": 0.98, "learning_rate": 3.985655272595216e-07, "loss": 0.7063, "step": 21590 }, { "epoch": 0.98, "learning_rate": 3.8948658586408825e-07, "loss": 0.775, "step": 21600 }, { "epoch": 0.98, "learning_rate": 3.8040764446865495e-07, "loss": 0.5451, "step": 21610 }, { "epoch": 0.98, "learning_rate": 3.713287030732217e-07, "loss": 0.5568, "step": 21620 }, { "epoch": 0.98, "learning_rate": 3.622497616777884e-07, "loss": 0.7299, "step": 21630 }, { "epoch": 0.98, "learning_rate": 3.5317082028235514e-07, "loss": 0.6406, "step": 21640 }, { "epoch": 0.98, "learning_rate": 3.440918788869218e-07, "loss": 0.6998, "step": 21650 }, { "epoch": 0.98, "learning_rate": 3.3501293749148853e-07, "loss": 0.6822, "step": 21660 }, { "epoch": 0.98, "learning_rate": 3.2593399609605523e-07, "loss": 0.6109, "step": 21670 }, { "epoch": 0.98, "learning_rate": 3.16855054700622e-07, "loss": 0.6658, "step": 21680 }, { "epoch": 0.98, "learning_rate": 3.077761133051886e-07, "loss": 0.6207, "step": 21690 }, { "epoch": 0.99, "learning_rate": 2.986971719097553e-07, "loss": 0.7873, "step": 21700 }, { "epoch": 0.99, "learning_rate": 2.8961823051432207e-07, "loss": 0.6695, "step": 21710 }, { "epoch": 0.99, "learning_rate": 2.8053928911888876e-07, "loss": 0.6246, "step": 21720 }, { "epoch": 0.99, "learning_rate": 2.7146034772345546e-07, "loss": 0.6021, "step": 21730 }, { "epoch": 0.99, "learning_rate": 2.623814063280222e-07, "loss": 0.6416, "step": 21740 }, { "epoch": 0.99, "learning_rate": 2.533024649325889e-07, "loss": 0.6463, "step": 21750 }, { "epoch": 0.99, "learning_rate": 2.442235235371556e-07, "loss": 0.6547, "step": 21760 }, { "epoch": 0.99, "learning_rate": 2.351445821417223e-07, "loss": 0.6244, "step": 21770 }, { "epoch": 0.99, "learning_rate": 2.2606564074628902e-07, "loss": 0.6291, "step": 21780 }, { "epoch": 0.99, "learning_rate": 2.1698669935085571e-07, "loss": 0.7139, "step": 21790 }, { "epoch": 0.99, "learning_rate": 2.0790775795542244e-07, "loss": 0.6533, "step": 21800 }, { "epoch": 0.99, "learning_rate": 1.988288165599891e-07, "loss": 0.7258, "step": 21810 }, { "epoch": 0.99, "learning_rate": 1.8974987516455583e-07, "loss": 0.6953, "step": 21820 }, { "epoch": 0.99, "learning_rate": 1.8067093376912252e-07, "loss": 0.6939, "step": 21830 }, { "epoch": 0.99, "learning_rate": 1.7159199237368925e-07, "loss": 0.7047, "step": 21840 }, { "epoch": 0.99, "learning_rate": 1.6251305097825594e-07, "loss": 0.6329, "step": 21850 }, { "epoch": 0.99, "learning_rate": 1.5343410958282267e-07, "loss": 0.6727, "step": 21860 }, { "epoch": 0.99, "learning_rate": 1.4435516818738936e-07, "loss": 0.7387, "step": 21870 }, { "epoch": 0.99, "learning_rate": 1.3527622679195606e-07, "loss": 0.7191, "step": 21880 }, { "epoch": 0.99, "learning_rate": 1.2619728539652278e-07, "loss": 0.7266, "step": 21890 }, { "epoch": 0.99, "learning_rate": 1.1711834400108948e-07, "loss": 0.6119, "step": 21900 }, { "epoch": 0.99, "learning_rate": 1.0803940260565619e-07, "loss": 0.6201, "step": 21910 }, { "epoch": 1.0, "learning_rate": 9.896046121022288e-08, "loss": 0.7092, "step": 21920 }, { "epoch": 1.0, "learning_rate": 8.98815198147896e-08, "loss": 0.6348, "step": 21930 }, { "epoch": 1.0, "learning_rate": 8.080257841935631e-08, "loss": 0.7164, "step": 21940 }, { "epoch": 1.0, "learning_rate": 7.172363702392301e-08, "loss": 0.6273, "step": 21950 }, { "epoch": 1.0, "learning_rate": 6.264469562848973e-08, "loss": 0.6154, "step": 21960 }, { "epoch": 1.0, "learning_rate": 5.3565754233056436e-08, "loss": 0.7488, "step": 21970 }, { "epoch": 1.0, "learning_rate": 4.448681283762314e-08, "loss": 0.6797, "step": 21980 }, { "epoch": 1.0, "learning_rate": 3.540787144218984e-08, "loss": 0.6977, "step": 21990 }, { "epoch": 1.0, "learning_rate": 2.6328930046756547e-08, "loss": 0.632, "step": 22000 }, { "epoch": 1.0, "eval_accuracy": 0.6269917914051183, "eval_loss": 0.6599072813987732, "eval_runtime": 73.7894, "eval_samples_per_second": 56.133, "eval_steps_per_second": 14.04, "step": 22000 } ], "max_steps": 22029, "num_train_epochs": 1, "total_flos": 0.0, "trial_name": null, "trial_params": null }