{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "global_step": 3185240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999998430259573e-05, "loss": 11.0257, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.999215129786139e-05, "loss": 8.1808, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.998430259572278e-05, "loss": 7.356, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.997645389358416e-05, "loss": 6.9237, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.996860519144554e-05, "loss": 6.581, "step": 2000 }, { "epoch": 0.02, "learning_rate": 4.996075648930693e-05, "loss": 6.3241, "step": 2500 }, { "epoch": 0.02, "learning_rate": 4.995290778716832e-05, "loss": 6.0972, "step": 3000 }, { "epoch": 0.02, "learning_rate": 4.99450590850297e-05, "loss": 5.9098, "step": 3500 }, { "epoch": 0.03, "learning_rate": 4.993721038289109e-05, "loss": 5.7506, "step": 4000 }, { "epoch": 0.03, "learning_rate": 4.9929361680752475e-05, "loss": 5.6173, "step": 4500 }, { "epoch": 0.03, "learning_rate": 4.992151297861386e-05, "loss": 5.4953, "step": 5000 }, { "epoch": 0.03, "learning_rate": 4.9913664276475245e-05, "loss": 5.392, "step": 5500 }, { "epoch": 0.04, "learning_rate": 4.9905815574336626e-05, "loss": 5.2994, "step": 6000 }, { "epoch": 0.04, "learning_rate": 4.9897982569602294e-05, "loss": 5.2102, "step": 6500 }, { "epoch": 0.04, "learning_rate": 4.9890133867463676e-05, "loss": 5.135, "step": 7000 }, { "epoch": 0.05, "learning_rate": 4.9882285165325064e-05, "loss": 5.0578, "step": 7500 }, { "epoch": 0.05, "learning_rate": 4.987443646318645e-05, "loss": 4.9953, "step": 8000 }, { "epoch": 0.05, "learning_rate": 4.986658776104783e-05, "loss": 4.9339, "step": 8500 }, { "epoch": 0.06, "learning_rate": 4.985873905890922e-05, "loss": 4.876, "step": 9000 }, { "epoch": 0.06, "learning_rate": 4.985089035677061e-05, "loss": 4.8115, "step": 9500 }, { "epoch": 0.06, "learning_rate": 4.9843041654632e-05, "loss": 4.7707, "step": 10000 }, { "epoch": 0.07, "learning_rate": 4.983519295249338e-05, "loss": 4.7185, "step": 10500 }, { "epoch": 0.07, "learning_rate": 4.982734425035476e-05, "loss": 4.6767, "step": 11000 }, { "epoch": 0.07, "learning_rate": 4.981949554821615e-05, "loss": 4.6359, "step": 11500 }, { "epoch": 0.08, "learning_rate": 4.981166254348181e-05, "loss": 4.6034, "step": 12000 }, { "epoch": 0.08, "learning_rate": 4.98038138413432e-05, "loss": 4.5651, "step": 12500 }, { "epoch": 0.08, "learning_rate": 4.979596513920458e-05, "loss": 4.5314, "step": 13000 }, { "epoch": 0.08, "learning_rate": 4.978811643706597e-05, "loss": 4.4995, "step": 13500 }, { "epoch": 0.09, "learning_rate": 4.9780267734927356e-05, "loss": 4.4758, "step": 14000 }, { "epoch": 0.09, "learning_rate": 4.9772419032788744e-05, "loss": 4.4467, "step": 14500 }, { "epoch": 0.09, "learning_rate": 4.976457033065013e-05, "loss": 4.4235, "step": 15000 }, { "epoch": 0.1, "learning_rate": 4.975672162851151e-05, "loss": 4.4027, "step": 15500 }, { "epoch": 0.1, "learning_rate": 4.9748888623777175e-05, "loss": 4.3774, "step": 16000 }, { "epoch": 0.1, "learning_rate": 4.974103992163856e-05, "loss": 4.3599, "step": 16500 }, { "epoch": 0.11, "learning_rate": 4.9733206916904224e-05, "loss": 4.3373, "step": 17000 }, { "epoch": 0.11, "learning_rate": 4.972535821476561e-05, "loss": 4.3081, "step": 17500 }, { "epoch": 0.11, "learning_rate": 4.9717509512626993e-05, "loss": 4.292, "step": 18000 }, { "epoch": 0.12, "learning_rate": 4.970966081048838e-05, "loss": 4.2785, "step": 18500 }, { "epoch": 0.12, "learning_rate": 4.970182780575404e-05, "loss": 4.2639, "step": 19000 }, { "epoch": 0.12, "learning_rate": 4.969397910361543e-05, "loss": 4.2458, "step": 19500 }, { "epoch": 0.13, "learning_rate": 4.968613040147682e-05, "loss": 4.2364, "step": 20000 }, { "epoch": 0.13, "learning_rate": 4.9678281699338194e-05, "loss": 4.2189, "step": 20500 }, { "epoch": 0.13, "learning_rate": 4.967043299719958e-05, "loss": 4.1994, "step": 21000 }, { "epoch": 0.13, "learning_rate": 4.966258429506097e-05, "loss": 4.1906, "step": 21500 }, { "epoch": 0.14, "learning_rate": 4.965473559292236e-05, "loss": 4.1752, "step": 22000 }, { "epoch": 0.14, "learning_rate": 4.9646886890783746e-05, "loss": 4.1668, "step": 22500 }, { "epoch": 0.14, "learning_rate": 4.96390538860494e-05, "loss": 4.1451, "step": 23000 }, { "epoch": 0.15, "learning_rate": 4.963120518391079e-05, "loss": 4.1388, "step": 23500 }, { "epoch": 0.15, "learning_rate": 4.962335648177218e-05, "loss": 4.1323, "step": 24000 }, { "epoch": 0.15, "learning_rate": 4.9615507779633565e-05, "loss": 4.1146, "step": 24500 }, { "epoch": 0.16, "learning_rate": 4.9607659077494953e-05, "loss": 4.1098, "step": 25000 }, { "epoch": 0.16, "learning_rate": 4.9599826072760615e-05, "loss": 4.1038, "step": 25500 }, { "epoch": 0.16, "learning_rate": 4.9591977370621996e-05, "loss": 4.0868, "step": 26000 }, { "epoch": 0.17, "learning_rate": 4.958414436588766e-05, "loss": 4.0839, "step": 26500 }, { "epoch": 0.17, "learning_rate": 4.9576295663749046e-05, "loss": 4.0741, "step": 27000 }, { "epoch": 0.17, "learning_rate": 4.9568446961610434e-05, "loss": 4.0611, "step": 27500 }, { "epoch": 0.18, "learning_rate": 4.956059825947182e-05, "loss": 4.0544, "step": 28000 }, { "epoch": 0.18, "learning_rate": 4.95527495573332e-05, "loss": 4.0453, "step": 28500 }, { "epoch": 0.18, "learning_rate": 4.9544900855194585e-05, "loss": 4.0321, "step": 29000 }, { "epoch": 0.19, "learning_rate": 4.953706785046025e-05, "loss": 4.0287, "step": 29500 }, { "epoch": 0.19, "learning_rate": 4.9529219148321634e-05, "loss": 4.0159, "step": 30000 }, { "epoch": 0.19, "eval_accuracy": 0.3463794149112016, "eval_loss": 3.907853841781616, "eval_runtime": 1454.1456, "eval_samples_per_second": 57.629, "eval_steps_per_second": 5.764, "step": 30000 }, { "epoch": 0.19, "learning_rate": 4.952137044618302e-05, "loss": 4.0094, "step": 30500 }, { "epoch": 0.19, "learning_rate": 4.9513521744044404e-05, "loss": 3.9965, "step": 31000 }, { "epoch": 0.2, "learning_rate": 4.950567304190579e-05, "loss": 3.9888, "step": 31500 }, { "epoch": 0.2, "learning_rate": 4.949782433976718e-05, "loss": 3.9859, "step": 32000 }, { "epoch": 0.2, "learning_rate": 4.948997563762857e-05, "loss": 3.9804, "step": 32500 }, { "epoch": 0.21, "learning_rate": 4.948214263289423e-05, "loss": 3.9736, "step": 33000 }, { "epoch": 0.21, "learning_rate": 4.947429393075561e-05, "loss": 3.9596, "step": 33500 }, { "epoch": 0.21, "learning_rate": 4.9466445228617e-05, "loss": 3.9614, "step": 34000 }, { "epoch": 0.22, "learning_rate": 4.945859652647839e-05, "loss": 3.9499, "step": 34500 }, { "epoch": 0.22, "learning_rate": 4.945074782433977e-05, "loss": 3.9473, "step": 35000 }, { "epoch": 0.22, "learning_rate": 4.944289912220115e-05, "loss": 3.9313, "step": 35500 }, { "epoch": 0.23, "learning_rate": 4.943506611746682e-05, "loss": 3.9349, "step": 36000 }, { "epoch": 0.23, "learning_rate": 4.9427217415328206e-05, "loss": 3.9283, "step": 36500 }, { "epoch": 0.23, "learning_rate": 4.941936871318959e-05, "loss": 3.9194, "step": 37000 }, { "epoch": 0.24, "learning_rate": 4.9411520011050975e-05, "loss": 3.9118, "step": 37500 }, { "epoch": 0.24, "learning_rate": 4.940367130891236e-05, "loss": 3.9089, "step": 38000 }, { "epoch": 0.24, "learning_rate": 4.9395822606773745e-05, "loss": 3.8998, "step": 38500 }, { "epoch": 0.24, "learning_rate": 4.938797390463513e-05, "loss": 3.9022, "step": 39000 }, { "epoch": 0.25, "learning_rate": 4.938012520249652e-05, "loss": 3.8869, "step": 39500 }, { "epoch": 0.25, "learning_rate": 4.93722765003579e-05, "loss": 3.8878, "step": 40000 }, { "epoch": 0.25, "learning_rate": 4.9364443495623564e-05, "loss": 3.8821, "step": 40500 }, { "epoch": 0.26, "learning_rate": 4.935659479348495e-05, "loss": 3.8694, "step": 41000 }, { "epoch": 0.26, "learning_rate": 4.9348761788750613e-05, "loss": 3.8705, "step": 41500 }, { "epoch": 0.26, "learning_rate": 4.9340913086612e-05, "loss": 3.8675, "step": 42000 }, { "epoch": 0.27, "learning_rate": 4.933306438447339e-05, "loss": 3.86, "step": 42500 }, { "epoch": 0.27, "learning_rate": 4.932521568233477e-05, "loss": 3.844, "step": 43000 }, { "epoch": 0.27, "learning_rate": 4.931736698019615e-05, "loss": 3.8564, "step": 43500 }, { "epoch": 0.28, "learning_rate": 4.930951827805754e-05, "loss": 3.8462, "step": 44000 }, { "epoch": 0.28, "learning_rate": 4.930168527332321e-05, "loss": 3.8353, "step": 44500 }, { "epoch": 0.28, "learning_rate": 4.929383657118459e-05, "loss": 3.8342, "step": 45000 }, { "epoch": 0.29, "learning_rate": 4.928598786904597e-05, "loss": 3.8284, "step": 45500 }, { "epoch": 0.29, "learning_rate": 4.927813916690736e-05, "loss": 3.8273, "step": 46000 }, { "epoch": 0.29, "learning_rate": 4.927029046476875e-05, "loss": 3.8228, "step": 46500 }, { "epoch": 0.3, "learning_rate": 4.9262441762630136e-05, "loss": 3.8139, "step": 47000 }, { "epoch": 0.3, "learning_rate": 4.9254593060491524e-05, "loss": 3.8111, "step": 47500 }, { "epoch": 0.3, "learning_rate": 4.9246744358352905e-05, "loss": 3.8038, "step": 48000 }, { "epoch": 0.3, "learning_rate": 4.923891135361857e-05, "loss": 3.806, "step": 48500 }, { "epoch": 0.31, "learning_rate": 4.9231062651479955e-05, "loss": 3.7963, "step": 49000 }, { "epoch": 0.31, "learning_rate": 4.922321394934134e-05, "loss": 3.7946, "step": 49500 }, { "epoch": 0.31, "learning_rate": 4.9215365247202724e-05, "loss": 3.7958, "step": 50000 }, { "epoch": 0.32, "learning_rate": 4.920753224246839e-05, "loss": 3.7856, "step": 50500 }, { "epoch": 0.32, "learning_rate": 4.919969923773405e-05, "loss": 3.7828, "step": 51000 }, { "epoch": 0.32, "learning_rate": 4.9191850535595435e-05, "loss": 3.7764, "step": 51500 }, { "epoch": 0.33, "learning_rate": 4.918400183345682e-05, "loss": 3.7753, "step": 52000 }, { "epoch": 0.33, "learning_rate": 4.917615313131821e-05, "loss": 3.7715, "step": 52500 }, { "epoch": 0.33, "learning_rate": 4.916830442917959e-05, "loss": 3.7687, "step": 53000 }, { "epoch": 0.34, "learning_rate": 4.9160455727040974e-05, "loss": 3.7584, "step": 53500 }, { "epoch": 0.34, "learning_rate": 4.915260702490236e-05, "loss": 3.7594, "step": 54000 }, { "epoch": 0.34, "learning_rate": 4.914477402016803e-05, "loss": 3.7547, "step": 54500 }, { "epoch": 0.35, "learning_rate": 4.913692531802941e-05, "loss": 3.7557, "step": 55000 }, { "epoch": 0.35, "learning_rate": 4.91290766158908e-05, "loss": 3.7503, "step": 55500 }, { "epoch": 0.35, "learning_rate": 4.912122791375218e-05, "loss": 3.743, "step": 56000 }, { "epoch": 0.35, "learning_rate": 4.911339490901784e-05, "loss": 3.7398, "step": 56500 }, { "epoch": 0.36, "learning_rate": 4.910554620687923e-05, "loss": 3.7413, "step": 57000 }, { "epoch": 0.36, "learning_rate": 4.909769750474062e-05, "loss": 3.7408, "step": 57500 }, { "epoch": 0.36, "learning_rate": 4.908984880260201e-05, "loss": 3.7349, "step": 58000 }, { "epoch": 0.37, "learning_rate": 4.908200010046339e-05, "loss": 3.7308, "step": 58500 }, { "epoch": 0.37, "learning_rate": 4.907416709572905e-05, "loss": 3.734, "step": 59000 }, { "epoch": 0.37, "learning_rate": 4.906631839359044e-05, "loss": 3.7228, "step": 59500 }, { "epoch": 0.38, "learning_rate": 4.9058469691451826e-05, "loss": 3.7138, "step": 60000 }, { "epoch": 0.38, "eval_accuracy": 0.37633594403107123, "eval_loss": 3.622432231903076, "eval_runtime": 1449.9239, "eval_samples_per_second": 57.797, "eval_steps_per_second": 5.78, "step": 60000 }, { "epoch": 0.38, "learning_rate": 4.9050620989313214e-05, "loss": 3.7188, "step": 60500 }, { "epoch": 0.38, "learning_rate": 4.9042787984578875e-05, "loss": 3.7128, "step": 61000 }, { "epoch": 0.39, "learning_rate": 4.903493928244026e-05, "loss": 3.7047, "step": 61500 }, { "epoch": 0.39, "learning_rate": 4.9027090580301645e-05, "loss": 3.7105, "step": 62000 }, { "epoch": 0.39, "learning_rate": 4.901924187816303e-05, "loss": 3.7127, "step": 62500 }, { "epoch": 0.4, "learning_rate": 4.9011393176024414e-05, "loss": 3.7051, "step": 63000 }, { "epoch": 0.4, "learning_rate": 4.900356017129008e-05, "loss": 3.7007, "step": 63500 }, { "epoch": 0.4, "learning_rate": 4.8995711469151464e-05, "loss": 3.694, "step": 64000 }, { "epoch": 0.4, "learning_rate": 4.898786276701285e-05, "loss": 3.6968, "step": 64500 }, { "epoch": 0.41, "learning_rate": 4.898001406487423e-05, "loss": 3.6896, "step": 65000 }, { "epoch": 0.41, "learning_rate": 4.897216536273562e-05, "loss": 3.6922, "step": 65500 }, { "epoch": 0.41, "learning_rate": 4.896433235800128e-05, "loss": 3.6851, "step": 66000 }, { "epoch": 0.42, "learning_rate": 4.8956483655862664e-05, "loss": 3.6823, "step": 66500 }, { "epoch": 0.42, "learning_rate": 4.894863495372405e-05, "loss": 3.6847, "step": 67000 }, { "epoch": 0.42, "learning_rate": 4.894078625158544e-05, "loss": 3.6762, "step": 67500 }, { "epoch": 0.43, "learning_rate": 4.893293754944683e-05, "loss": 3.6759, "step": 68000 }, { "epoch": 0.43, "learning_rate": 4.892510454471249e-05, "loss": 3.6687, "step": 68500 }, { "epoch": 0.43, "learning_rate": 4.891725584257387e-05, "loss": 3.6666, "step": 69000 }, { "epoch": 0.44, "learning_rate": 4.890940714043526e-05, "loss": 3.6666, "step": 69500 }, { "epoch": 0.44, "learning_rate": 4.890155843829665e-05, "loss": 3.6734, "step": 70000 }, { "epoch": 0.44, "learning_rate": 4.889372543356231e-05, "loss": 3.6629, "step": 70500 }, { "epoch": 0.45, "learning_rate": 4.88858767314237e-05, "loss": 3.667, "step": 71000 }, { "epoch": 0.45, "learning_rate": 4.887802802928508e-05, "loss": 3.667, "step": 71500 }, { "epoch": 0.45, "learning_rate": 4.8870179327146467e-05, "loss": 3.6527, "step": 72000 }, { "epoch": 0.46, "learning_rate": 4.8862330625007855e-05, "loss": 3.6514, "step": 72500 }, { "epoch": 0.46, "learning_rate": 4.8854497620273516e-05, "loss": 3.6537, "step": 73000 }, { "epoch": 0.46, "learning_rate": 4.8846648918134904e-05, "loss": 3.6431, "step": 73500 }, { "epoch": 0.46, "learning_rate": 4.8838815913400566e-05, "loss": 3.6414, "step": 74000 }, { "epoch": 0.47, "learning_rate": 4.883096721126195e-05, "loss": 3.645, "step": 74500 }, { "epoch": 0.47, "learning_rate": 4.8823118509123335e-05, "loss": 3.6399, "step": 75000 }, { "epoch": 0.47, "learning_rate": 4.881526980698472e-05, "loss": 3.6429, "step": 75500 }, { "epoch": 0.48, "learning_rate": 4.8807421104846105e-05, "loss": 3.6377, "step": 76000 }, { "epoch": 0.48, "learning_rate": 4.8799572402707486e-05, "loss": 3.6395, "step": 76500 }, { "epoch": 0.48, "learning_rate": 4.8791723700568874e-05, "loss": 3.6344, "step": 77000 }, { "epoch": 0.49, "learning_rate": 4.878387499843026e-05, "loss": 3.6296, "step": 77500 }, { "epoch": 0.49, "learning_rate": 4.877602629629165e-05, "loss": 3.6277, "step": 78000 }, { "epoch": 0.49, "learning_rate": 4.876817759415303e-05, "loss": 3.6268, "step": 78500 }, { "epoch": 0.5, "learning_rate": 4.876034458941869e-05, "loss": 3.6278, "step": 79000 }, { "epoch": 0.5, "learning_rate": 4.875249588728008e-05, "loss": 3.623, "step": 79500 }, { "epoch": 0.5, "learning_rate": 4.874464718514147e-05, "loss": 3.6189, "step": 80000 }, { "epoch": 0.51, "learning_rate": 4.873679848300286e-05, "loss": 3.6199, "step": 80500 }, { "epoch": 0.51, "learning_rate": 4.872896547826852e-05, "loss": 3.6157, "step": 81000 }, { "epoch": 0.51, "learning_rate": 4.87211167761299e-05, "loss": 3.6175, "step": 81500 }, { "epoch": 0.51, "learning_rate": 4.871326807399129e-05, "loss": 3.6142, "step": 82000 }, { "epoch": 0.52, "learning_rate": 4.8705419371852676e-05, "loss": 3.607, "step": 82500 }, { "epoch": 0.52, "learning_rate": 4.869757066971406e-05, "loss": 3.6096, "step": 83000 }, { "epoch": 0.52, "learning_rate": 4.8689721967575446e-05, "loss": 3.6056, "step": 83500 }, { "epoch": 0.53, "learning_rate": 4.868187326543683e-05, "loss": 3.6029, "step": 84000 }, { "epoch": 0.53, "learning_rate": 4.8674024563298215e-05, "loss": 3.6062, "step": 84500 }, { "epoch": 0.53, "learning_rate": 4.866619155856388e-05, "loss": 3.6, "step": 85000 }, { "epoch": 0.54, "learning_rate": 4.8658358553829545e-05, "loss": 3.5978, "step": 85500 }, { "epoch": 0.54, "learning_rate": 4.8650509851690926e-05, "loss": 3.5973, "step": 86000 }, { "epoch": 0.54, "learning_rate": 4.864266114955231e-05, "loss": 3.5962, "step": 86500 }, { "epoch": 0.55, "learning_rate": 4.8634812447413696e-05, "loss": 3.5949, "step": 87000 }, { "epoch": 0.55, "learning_rate": 4.8626963745275084e-05, "loss": 3.5956, "step": 87500 }, { "epoch": 0.55, "learning_rate": 4.8619130740540745e-05, "loss": 3.5835, "step": 88000 }, { "epoch": 0.56, "learning_rate": 4.861128203840213e-05, "loss": 3.5887, "step": 88500 }, { "epoch": 0.56, "learning_rate": 4.8603433336263515e-05, "loss": 3.5943, "step": 89000 }, { "epoch": 0.56, "learning_rate": 4.85955846341249e-05, "loss": 3.5845, "step": 89500 }, { "epoch": 0.57, "learning_rate": 4.858773593198629e-05, "loss": 3.5781, "step": 90000 }, { "epoch": 0.57, "eval_accuracy": 0.39085216813098267, "eval_loss": 3.487050771713257, "eval_runtime": 1453.1684, "eval_samples_per_second": 57.668, "eval_steps_per_second": 5.767, "step": 90000 }, { "epoch": 0.57, "learning_rate": 4.857988722984768e-05, "loss": 3.5773, "step": 90500 }, { "epoch": 0.57, "learning_rate": 4.857205422511334e-05, "loss": 3.5817, "step": 91000 }, { "epoch": 0.57, "learning_rate": 4.856420552297472e-05, "loss": 3.5737, "step": 91500 }, { "epoch": 0.58, "learning_rate": 4.855635682083611e-05, "loss": 3.5732, "step": 92000 }, { "epoch": 0.58, "learning_rate": 4.854850811869749e-05, "loss": 3.5719, "step": 92500 }, { "epoch": 0.58, "learning_rate": 4.854065941655888e-05, "loss": 3.5786, "step": 93000 }, { "epoch": 0.59, "learning_rate": 4.853281071442027e-05, "loss": 3.5774, "step": 93500 }, { "epoch": 0.59, "learning_rate": 4.852496201228165e-05, "loss": 3.5708, "step": 94000 }, { "epoch": 0.59, "learning_rate": 4.851711331014304e-05, "loss": 3.5668, "step": 94500 }, { "epoch": 0.6, "learning_rate": 4.85092803054087e-05, "loss": 3.568, "step": 95000 }, { "epoch": 0.6, "learning_rate": 4.8501431603270087e-05, "loss": 3.564, "step": 95500 }, { "epoch": 0.6, "learning_rate": 4.849359859853575e-05, "loss": 3.5689, "step": 96000 }, { "epoch": 0.61, "learning_rate": 4.8485749896397136e-05, "loss": 3.5631, "step": 96500 }, { "epoch": 0.61, "learning_rate": 4.847790119425852e-05, "loss": 3.5602, "step": 97000 }, { "epoch": 0.61, "learning_rate": 4.8470052492119905e-05, "loss": 3.562, "step": 97500 }, { "epoch": 0.62, "learning_rate": 4.8462203789981294e-05, "loss": 3.5605, "step": 98000 }, { "epoch": 0.62, "learning_rate": 4.845435508784268e-05, "loss": 3.5563, "step": 98500 }, { "epoch": 0.62, "learning_rate": 4.844650638570406e-05, "loss": 3.5542, "step": 99000 }, { "epoch": 0.62, "learning_rate": 4.8438657683565444e-05, "loss": 3.5577, "step": 99500 }, { "epoch": 0.63, "learning_rate": 4.843082467883111e-05, "loss": 3.5476, "step": 100000 }, { "epoch": 0.63, "learning_rate": 4.8422975976692494e-05, "loss": 3.5471, "step": 100500 }, { "epoch": 0.63, "learning_rate": 4.841512727455388e-05, "loss": 3.5446, "step": 101000 }, { "epoch": 0.64, "learning_rate": 4.840729426981955e-05, "loss": 3.5477, "step": 101500 }, { "epoch": 0.64, "learning_rate": 4.839944556768093e-05, "loss": 3.5495, "step": 102000 }, { "epoch": 0.64, "learning_rate": 4.839159686554231e-05, "loss": 3.5497, "step": 102500 }, { "epoch": 0.65, "learning_rate": 4.83837481634037e-05, "loss": 3.5399, "step": 103000 }, { "epoch": 0.65, "learning_rate": 4.837591515866937e-05, "loss": 3.5386, "step": 103500 }, { "epoch": 0.65, "learning_rate": 4.836806645653075e-05, "loss": 3.5402, "step": 104000 }, { "epoch": 0.66, "learning_rate": 4.836021775439213e-05, "loss": 3.537, "step": 104500 }, { "epoch": 0.66, "learning_rate": 4.835236905225352e-05, "loss": 3.5427, "step": 105000 }, { "epoch": 0.66, "learning_rate": 4.834452035011491e-05, "loss": 3.5388, "step": 105500 }, { "epoch": 0.67, "learning_rate": 4.8336671647976296e-05, "loss": 3.542, "step": 106000 }, { "epoch": 0.67, "learning_rate": 4.832883864324196e-05, "loss": 3.5329, "step": 106500 }, { "epoch": 0.67, "learning_rate": 4.832098994110334e-05, "loss": 3.5341, "step": 107000 }, { "epoch": 0.67, "learning_rate": 4.831314123896473e-05, "loss": 3.5299, "step": 107500 }, { "epoch": 0.68, "learning_rate": 4.8305292536826115e-05, "loss": 3.5208, "step": 108000 }, { "epoch": 0.68, "learning_rate": 4.8297443834687503e-05, "loss": 3.5272, "step": 108500 }, { "epoch": 0.68, "learning_rate": 4.8289595132548885e-05, "loss": 3.5222, "step": 109000 }, { "epoch": 0.69, "learning_rate": 4.8281762127814546e-05, "loss": 3.5218, "step": 109500 }, { "epoch": 0.69, "learning_rate": 4.8273913425675934e-05, "loss": 3.5272, "step": 110000 }, { "epoch": 0.69, "learning_rate": 4.8266064723537316e-05, "loss": 3.5286, "step": 110500 }, { "epoch": 0.7, "learning_rate": 4.8258216021398704e-05, "loss": 3.5236, "step": 111000 }, { "epoch": 0.7, "learning_rate": 4.8250367319260085e-05, "loss": 3.5194, "step": 111500 }, { "epoch": 0.7, "learning_rate": 4.824251861712147e-05, "loss": 3.522, "step": 112000 }, { "epoch": 0.71, "learning_rate": 4.823466991498286e-05, "loss": 3.5215, "step": 112500 }, { "epoch": 0.71, "learning_rate": 4.822683691024852e-05, "loss": 3.5188, "step": 113000 }, { "epoch": 0.71, "learning_rate": 4.821898820810991e-05, "loss": 3.5164, "step": 113500 }, { "epoch": 0.72, "learning_rate": 4.821115520337557e-05, "loss": 3.5115, "step": 114000 }, { "epoch": 0.72, "learning_rate": 4.8203306501236954e-05, "loss": 3.5144, "step": 114500 }, { "epoch": 0.72, "learning_rate": 4.819545779909834e-05, "loss": 3.5092, "step": 115000 }, { "epoch": 0.73, "learning_rate": 4.818760909695973e-05, "loss": 3.5079, "step": 115500 }, { "epoch": 0.73, "learning_rate": 4.817976039482112e-05, "loss": 3.5159, "step": 116000 }, { "epoch": 0.73, "learning_rate": 4.81719116926825e-05, "loss": 3.507, "step": 116500 }, { "epoch": 0.73, "learning_rate": 4.816406299054389e-05, "loss": 3.5052, "step": 117000 }, { "epoch": 0.74, "learning_rate": 4.815622998580955e-05, "loss": 3.4984, "step": 117500 }, { "epoch": 0.74, "learning_rate": 4.814838128367094e-05, "loss": 3.5103, "step": 118000 }, { "epoch": 0.74, "learning_rate": 4.814053258153232e-05, "loss": 3.5059, "step": 118500 }, { "epoch": 0.75, "learning_rate": 4.8132683879393706e-05, "loss": 3.5012, "step": 119000 }, { "epoch": 0.75, "learning_rate": 4.812483517725509e-05, "loss": 3.4964, "step": 119500 }, { "epoch": 0.75, "learning_rate": 4.8116986475116476e-05, "loss": 3.4913, "step": 120000 }, { "epoch": 0.75, "eval_accuracy": 0.39986461666278406, "eval_loss": 3.405158042907715, "eval_runtime": 1449.7437, "eval_samples_per_second": 57.804, "eval_steps_per_second": 5.781, "step": 120000 }, { "epoch": 0.76, "learning_rate": 4.8109137772977864e-05, "loss": 3.5027, "step": 120500 }, { "epoch": 0.76, "learning_rate": 4.810128907083925e-05, "loss": 3.4945, "step": 121000 }, { "epoch": 0.76, "learning_rate": 4.8093440368700634e-05, "loss": 3.4876, "step": 121500 }, { "epoch": 0.77, "learning_rate": 4.808559166656202e-05, "loss": 3.4984, "step": 122000 }, { "epoch": 0.77, "learning_rate": 4.80777429644234e-05, "loss": 3.4946, "step": 122500 }, { "epoch": 0.77, "learning_rate": 4.806990995968907e-05, "loss": 3.4997, "step": 123000 }, { "epoch": 0.78, "learning_rate": 4.806206125755045e-05, "loss": 3.4849, "step": 123500 }, { "epoch": 0.78, "learning_rate": 4.8054212555411834e-05, "loss": 3.4829, "step": 124000 }, { "epoch": 0.78, "learning_rate": 4.804636385327322e-05, "loss": 3.4883, "step": 124500 }, { "epoch": 0.78, "learning_rate": 4.803851515113461e-05, "loss": 3.4802, "step": 125000 }, { "epoch": 0.79, "learning_rate": 4.803068214640027e-05, "loss": 3.4968, "step": 125500 }, { "epoch": 0.79, "learning_rate": 4.802283344426166e-05, "loss": 3.4826, "step": 126000 }, { "epoch": 0.79, "learning_rate": 4.801498474212304e-05, "loss": 3.4914, "step": 126500 }, { "epoch": 0.8, "learning_rate": 4.800713603998443e-05, "loss": 3.4796, "step": 127000 }, { "epoch": 0.8, "learning_rate": 4.799928733784582e-05, "loss": 3.4754, "step": 127500 }, { "epoch": 0.8, "learning_rate": 4.7991438635707205e-05, "loss": 3.4842, "step": 128000 }, { "epoch": 0.81, "learning_rate": 4.798360563097287e-05, "loss": 3.476, "step": 128500 }, { "epoch": 0.81, "learning_rate": 4.797575692883425e-05, "loss": 3.4819, "step": 129000 }, { "epoch": 0.81, "learning_rate": 4.7967908226695636e-05, "loss": 3.482, "step": 129500 }, { "epoch": 0.82, "learning_rate": 4.7960059524557024e-05, "loss": 3.4801, "step": 130000 }, { "epoch": 0.82, "learning_rate": 4.7952210822418406e-05, "loss": 3.4748, "step": 130500 }, { "epoch": 0.82, "learning_rate": 4.7944377817684074e-05, "loss": 3.4732, "step": 131000 }, { "epoch": 0.83, "learning_rate": 4.7936544812949735e-05, "loss": 3.4677, "step": 131500 }, { "epoch": 0.83, "learning_rate": 4.7928696110811117e-05, "loss": 3.47, "step": 132000 }, { "epoch": 0.83, "learning_rate": 4.7920847408672505e-05, "loss": 3.4707, "step": 132500 }, { "epoch": 0.84, "learning_rate": 4.791299870653389e-05, "loss": 3.4728, "step": 133000 }, { "epoch": 0.84, "learning_rate": 4.7905165701799554e-05, "loss": 3.4722, "step": 133500 }, { "epoch": 0.84, "learning_rate": 4.789731699966094e-05, "loss": 3.4654, "step": 134000 }, { "epoch": 0.84, "learning_rate": 4.7889468297522324e-05, "loss": 3.4636, "step": 134500 }, { "epoch": 0.85, "learning_rate": 4.788161959538371e-05, "loss": 3.469, "step": 135000 }, { "epoch": 0.85, "learning_rate": 4.787377089324509e-05, "loss": 3.4625, "step": 135500 }, { "epoch": 0.85, "learning_rate": 4.786592219110648e-05, "loss": 3.4671, "step": 136000 }, { "epoch": 0.86, "learning_rate": 4.785807348896786e-05, "loss": 3.4627, "step": 136500 }, { "epoch": 0.86, "learning_rate": 4.785022478682925e-05, "loss": 3.4604, "step": 137000 }, { "epoch": 0.86, "learning_rate": 4.784237608469064e-05, "loss": 3.4643, "step": 137500 }, { "epoch": 0.87, "learning_rate": 4.783452738255203e-05, "loss": 3.4604, "step": 138000 }, { "epoch": 0.87, "learning_rate": 4.782667868041341e-05, "loss": 3.4596, "step": 138500 }, { "epoch": 0.87, "learning_rate": 4.781884567567907e-05, "loss": 3.4595, "step": 139000 }, { "epoch": 0.88, "learning_rate": 4.781099697354046e-05, "loss": 3.4529, "step": 139500 }, { "epoch": 0.88, "learning_rate": 4.7803148271401846e-05, "loss": 3.4569, "step": 140000 }, { "epoch": 0.88, "learning_rate": 4.779529956926323e-05, "loss": 3.4538, "step": 140500 }, { "epoch": 0.89, "learning_rate": 4.7787450867124616e-05, "loss": 3.4569, "step": 141000 }, { "epoch": 0.89, "learning_rate": 4.7779602164986e-05, "loss": 3.4599, "step": 141500 }, { "epoch": 0.89, "learning_rate": 4.7771753462847385e-05, "loss": 3.4523, "step": 142000 }, { "epoch": 0.89, "learning_rate": 4.7763936155517326e-05, "loss": 3.4519, "step": 142500 }, { "epoch": 0.9, "learning_rate": 4.7756087453378715e-05, "loss": 3.4495, "step": 143000 }, { "epoch": 0.9, "learning_rate": 4.7748238751240096e-05, "loss": 3.4535, "step": 143500 }, { "epoch": 0.9, "learning_rate": 4.7740390049101484e-05, "loss": 3.4478, "step": 144000 }, { "epoch": 0.91, "learning_rate": 4.7732557044367145e-05, "loss": 3.4531, "step": 144500 }, { "epoch": 0.91, "learning_rate": 4.7724708342228534e-05, "loss": 3.4516, "step": 145000 }, { "epoch": 0.91, "learning_rate": 4.7716859640089915e-05, "loss": 3.4474, "step": 145500 }, { "epoch": 0.92, "learning_rate": 4.77090109379513e-05, "loss": 3.4483, "step": 146000 }, { "epoch": 0.92, "learning_rate": 4.770116223581269e-05, "loss": 3.4523, "step": 146500 }, { "epoch": 0.92, "learning_rate": 4.769331353367407e-05, "loss": 3.4523, "step": 147000 }, { "epoch": 0.93, "learning_rate": 4.768546483153546e-05, "loss": 3.4469, "step": 147500 }, { "epoch": 0.93, "learning_rate": 4.767761612939685e-05, "loss": 3.4426, "step": 148000 }, { "epoch": 0.93, "learning_rate": 4.766978312466251e-05, "loss": 3.4404, "step": 148500 }, { "epoch": 0.94, "learning_rate": 4.766195011992817e-05, "loss": 3.4383, "step": 149000 }, { "epoch": 0.94, "learning_rate": 4.765410141778955e-05, "loss": 3.439, "step": 149500 }, { "epoch": 0.94, "learning_rate": 4.764625271565094e-05, "loss": 3.4447, "step": 150000 }, { "epoch": 0.94, "eval_accuracy": 0.40664760624373086, "eval_loss": 3.3443257808685303, "eval_runtime": 1448.9394, "eval_samples_per_second": 57.836, "eval_steps_per_second": 5.784, "step": 150000 }, { "epoch": 0.94, "learning_rate": 4.763840401351233e-05, "loss": 3.4358, "step": 150500 }, { "epoch": 0.95, "learning_rate": 4.763055531137372e-05, "loss": 3.4423, "step": 151000 }, { "epoch": 0.95, "learning_rate": 4.76227066092351e-05, "loss": 3.4301, "step": 151500 }, { "epoch": 0.95, "learning_rate": 4.761485790709648e-05, "loss": 3.4484, "step": 152000 }, { "epoch": 0.96, "learning_rate": 4.760700920495787e-05, "loss": 3.4399, "step": 152500 }, { "epoch": 0.96, "learning_rate": 4.7599160502819256e-05, "loss": 3.4444, "step": 153000 }, { "epoch": 0.96, "learning_rate": 4.7591311800680644e-05, "loss": 3.43, "step": 153500 }, { "epoch": 0.97, "learning_rate": 4.7583463098542026e-05, "loss": 3.4344, "step": 154000 }, { "epoch": 0.97, "learning_rate": 4.7575614396403414e-05, "loss": 3.4308, "step": 154500 }, { "epoch": 0.97, "learning_rate": 4.7567781391669075e-05, "loss": 3.4331, "step": 155000 }, { "epoch": 0.98, "learning_rate": 4.755993268953046e-05, "loss": 3.4364, "step": 155500 }, { "epoch": 0.98, "learning_rate": 4.755208398739185e-05, "loss": 3.4288, "step": 156000 }, { "epoch": 0.98, "learning_rate": 4.754423528525323e-05, "loss": 3.4342, "step": 156500 }, { "epoch": 0.99, "learning_rate": 4.7536386583114614e-05, "loss": 3.4347, "step": 157000 }, { "epoch": 0.99, "learning_rate": 4.7528537880976e-05, "loss": 3.4315, "step": 157500 }, { "epoch": 0.99, "learning_rate": 4.752068917883739e-05, "loss": 3.4217, "step": 158000 }, { "epoch": 1.0, "learning_rate": 4.751284047669878e-05, "loss": 3.4268, "step": 158500 }, { "epoch": 1.0, "learning_rate": 4.750499177456016e-05, "loss": 3.4262, "step": 159000 }, { "epoch": 1.0, "learning_rate": 4.749715876982582e-05, "loss": 3.4167, "step": 159500 }, { "epoch": 1.0, "learning_rate": 4.748931006768721e-05, "loss": 3.4043, "step": 160000 }, { "epoch": 1.01, "learning_rate": 4.74814613655486e-05, "loss": 3.4068, "step": 160500 }, { "epoch": 1.01, "learning_rate": 4.7473612663409986e-05, "loss": 3.4098, "step": 161000 }, { "epoch": 1.01, "learning_rate": 4.746577965867564e-05, "loss": 3.409, "step": 161500 }, { "epoch": 1.02, "learning_rate": 4.745793095653703e-05, "loss": 3.4083, "step": 162000 }, { "epoch": 1.02, "learning_rate": 4.7450082254398416e-05, "loss": 3.4054, "step": 162500 }, { "epoch": 1.02, "learning_rate": 4.7442233552259805e-05, "loss": 3.404, "step": 163000 }, { "epoch": 1.03, "learning_rate": 4.7434384850121186e-05, "loss": 3.405, "step": 163500 }, { "epoch": 1.03, "learning_rate": 4.7426551845386854e-05, "loss": 3.3994, "step": 164000 }, { "epoch": 1.03, "learning_rate": 4.7418703143248235e-05, "loss": 3.4006, "step": 164500 }, { "epoch": 1.04, "learning_rate": 4.74108701385139e-05, "loss": 3.4054, "step": 165000 }, { "epoch": 1.04, "learning_rate": 4.7403021436375285e-05, "loss": 3.4015, "step": 165500 }, { "epoch": 1.04, "learning_rate": 4.739517273423667e-05, "loss": 3.4024, "step": 166000 }, { "epoch": 1.05, "learning_rate": 4.7387324032098054e-05, "loss": 3.4019, "step": 166500 }, { "epoch": 1.05, "learning_rate": 4.7379475329959436e-05, "loss": 3.4013, "step": 167000 }, { "epoch": 1.05, "learning_rate": 4.7371642325225104e-05, "loss": 3.3973, "step": 167500 }, { "epoch": 1.05, "learning_rate": 4.736379362308649e-05, "loss": 3.3948, "step": 168000 }, { "epoch": 1.06, "learning_rate": 4.7355944920947873e-05, "loss": 3.3991, "step": 168500 }, { "epoch": 1.06, "learning_rate": 4.734809621880926e-05, "loss": 3.4018, "step": 169000 }, { "epoch": 1.06, "learning_rate": 4.734024751667064e-05, "loss": 3.401, "step": 169500 }, { "epoch": 1.07, "learning_rate": 4.733239881453203e-05, "loss": 3.3942, "step": 170000 }, { "epoch": 1.07, "learning_rate": 4.732455011239342e-05, "loss": 3.4046, "step": 170500 }, { "epoch": 1.07, "learning_rate": 4.731670141025481e-05, "loss": 3.3944, "step": 171000 }, { "epoch": 1.08, "learning_rate": 4.730885270811619e-05, "loss": 3.3933, "step": 171500 }, { "epoch": 1.08, "learning_rate": 4.730101970338185e-05, "loss": 3.3932, "step": 172000 }, { "epoch": 1.08, "learning_rate": 4.729317100124324e-05, "loss": 3.3964, "step": 172500 }, { "epoch": 1.09, "learning_rate": 4.7285322299104626e-05, "loss": 3.3896, "step": 173000 }, { "epoch": 1.09, "learning_rate": 4.727747359696601e-05, "loss": 3.3997, "step": 173500 }, { "epoch": 1.09, "learning_rate": 4.7269640592231676e-05, "loss": 3.395, "step": 174000 }, { "epoch": 1.1, "learning_rate": 4.726179189009306e-05, "loss": 3.3983, "step": 174500 }, { "epoch": 1.1, "learning_rate": 4.725394318795444e-05, "loss": 3.3936, "step": 175000 }, { "epoch": 1.1, "learning_rate": 4.724611018322011e-05, "loss": 3.3893, "step": 175500 }, { "epoch": 1.11, "learning_rate": 4.7238261481081495e-05, "loss": 3.3894, "step": 176000 }, { "epoch": 1.11, "learning_rate": 4.7230412778942876e-05, "loss": 3.3946, "step": 176500 }, { "epoch": 1.11, "learning_rate": 4.722256407680426e-05, "loss": 3.3872, "step": 177000 }, { "epoch": 1.11, "learning_rate": 4.7214715374665646e-05, "loss": 3.3851, "step": 177500 }, { "epoch": 1.12, "learning_rate": 4.7206866672527034e-05, "loss": 3.3929, "step": 178000 }, { "epoch": 1.12, "learning_rate": 4.719901797038842e-05, "loss": 3.3875, "step": 178500 }, { "epoch": 1.12, "learning_rate": 4.71911692682498e-05, "loss": 3.3924, "step": 179000 }, { "epoch": 1.13, "learning_rate": 4.718332056611119e-05, "loss": 3.3911, "step": 179500 }, { "epoch": 1.13, "learning_rate": 4.717548756137685e-05, "loss": 3.3866, "step": 180000 }, { "epoch": 1.13, "eval_accuracy": 0.4116377365299254, "eval_loss": 3.3021390438079834, "eval_runtime": 1451.7429, "eval_samples_per_second": 57.724, "eval_steps_per_second": 5.773, "step": 180000 }, { "epoch": 1.13, "learning_rate": 4.716763885923824e-05, "loss": 3.3861, "step": 180500 }, { "epoch": 1.14, "learning_rate": 4.715979015709963e-05, "loss": 3.3783, "step": 181000 }, { "epoch": 1.14, "learning_rate": 4.715194145496101e-05, "loss": 3.3797, "step": 181500 }, { "epoch": 1.14, "learning_rate": 4.714409275282239e-05, "loss": 3.3941, "step": 182000 }, { "epoch": 1.15, "learning_rate": 4.713625974808806e-05, "loss": 3.3796, "step": 182500 }, { "epoch": 1.15, "learning_rate": 4.712841104594944e-05, "loss": 3.3875, "step": 183000 }, { "epoch": 1.15, "learning_rate": 4.712056234381083e-05, "loss": 3.3821, "step": 183500 }, { "epoch": 1.16, "learning_rate": 4.711271364167221e-05, "loss": 3.3756, "step": 184000 }, { "epoch": 1.16, "learning_rate": 4.71048649395336e-05, "loss": 3.3767, "step": 184500 }, { "epoch": 1.16, "learning_rate": 4.709701623739499e-05, "loss": 3.3874, "step": 185000 }, { "epoch": 1.16, "learning_rate": 4.7089167535256375e-05, "loss": 3.3771, "step": 185500 }, { "epoch": 1.17, "learning_rate": 4.708131883311776e-05, "loss": 3.3847, "step": 186000 }, { "epoch": 1.17, "learning_rate": 4.70735015257877e-05, "loss": 3.3812, "step": 186500 }, { "epoch": 1.17, "learning_rate": 4.706565282364908e-05, "loss": 3.3816, "step": 187000 }, { "epoch": 1.18, "learning_rate": 4.705780412151047e-05, "loss": 3.3761, "step": 187500 }, { "epoch": 1.18, "learning_rate": 4.7049955419371855e-05, "loss": 3.3791, "step": 188000 }, { "epoch": 1.18, "learning_rate": 4.7042106717233244e-05, "loss": 3.3755, "step": 188500 }, { "epoch": 1.19, "learning_rate": 4.703425801509463e-05, "loss": 3.3765, "step": 189000 }, { "epoch": 1.19, "learning_rate": 4.7026425010360286e-05, "loss": 3.3707, "step": 189500 }, { "epoch": 1.19, "learning_rate": 4.7018576308221674e-05, "loss": 3.372, "step": 190000 }, { "epoch": 1.2, "learning_rate": 4.701072760608306e-05, "loss": 3.3738, "step": 190500 }, { "epoch": 1.2, "learning_rate": 4.700287890394445e-05, "loss": 3.3773, "step": 191000 }, { "epoch": 1.2, "learning_rate": 4.699503020180583e-05, "loss": 3.375, "step": 191500 }, { "epoch": 1.21, "learning_rate": 4.698718149966721e-05, "loss": 3.3822, "step": 192000 }, { "epoch": 1.21, "learning_rate": 4.69793327975286e-05, "loss": 3.3734, "step": 192500 }, { "epoch": 1.21, "learning_rate": 4.697149979279426e-05, "loss": 3.3662, "step": 193000 }, { "epoch": 1.21, "learning_rate": 4.696365109065565e-05, "loss": 3.3735, "step": 193500 }, { "epoch": 1.22, "learning_rate": 4.695581808592132e-05, "loss": 3.3705, "step": 194000 }, { "epoch": 1.22, "learning_rate": 4.69479693837827e-05, "loss": 3.3693, "step": 194500 }, { "epoch": 1.22, "learning_rate": 4.694012068164408e-05, "loss": 3.376, "step": 195000 }, { "epoch": 1.23, "learning_rate": 4.693227197950547e-05, "loss": 3.3704, "step": 195500 }, { "epoch": 1.23, "learning_rate": 4.692442327736686e-05, "loss": 3.3808, "step": 196000 }, { "epoch": 1.23, "learning_rate": 4.6916574575228246e-05, "loss": 3.3614, "step": 196500 }, { "epoch": 1.24, "learning_rate": 4.690872587308963e-05, "loss": 3.3641, "step": 197000 }, { "epoch": 1.24, "learning_rate": 4.6900877170951016e-05, "loss": 3.3669, "step": 197500 }, { "epoch": 1.24, "learning_rate": 4.689304416621668e-05, "loss": 3.3737, "step": 198000 }, { "epoch": 1.25, "learning_rate": 4.688521116148234e-05, "loss": 3.3707, "step": 198500 }, { "epoch": 1.25, "learning_rate": 4.6877362459343727e-05, "loss": 3.369, "step": 199000 }, { "epoch": 1.25, "learning_rate": 4.6869513757205115e-05, "loss": 3.3714, "step": 199500 }, { "epoch": 1.26, "learning_rate": 4.6861665055066496e-05, "loss": 3.3636, "step": 200000 }, { "epoch": 1.26, "learning_rate": 4.685383205033216e-05, "loss": 3.3647, "step": 200500 }, { "epoch": 1.26, "learning_rate": 4.6845983348193546e-05, "loss": 3.3669, "step": 201000 }, { "epoch": 1.27, "learning_rate": 4.6838134646054934e-05, "loss": 3.3644, "step": 201500 }, { "epoch": 1.27, "learning_rate": 4.683028594391632e-05, "loss": 3.3657, "step": 202000 }, { "epoch": 1.27, "learning_rate": 4.68224372417777e-05, "loss": 3.3602, "step": 202500 }, { "epoch": 1.27, "learning_rate": 4.6814588539639085e-05, "loss": 3.3565, "step": 203000 }, { "epoch": 1.28, "learning_rate": 4.680673983750047e-05, "loss": 3.3647, "step": 203500 }, { "epoch": 1.28, "learning_rate": 4.679889113536186e-05, "loss": 3.3609, "step": 204000 }, { "epoch": 1.28, "learning_rate": 4.679104243322324e-05, "loss": 3.3593, "step": 204500 }, { "epoch": 1.29, "learning_rate": 4.6783209428488904e-05, "loss": 3.3635, "step": 205000 }, { "epoch": 1.29, "learning_rate": 4.677536072635029e-05, "loss": 3.3578, "step": 205500 }, { "epoch": 1.29, "learning_rate": 4.676751202421168e-05, "loss": 3.3625, "step": 206000 }, { "epoch": 1.3, "learning_rate": 4.675966332207307e-05, "loss": 3.3543, "step": 206500 }, { "epoch": 1.3, "learning_rate": 4.675181461993445e-05, "loss": 3.363, "step": 207000 }, { "epoch": 1.3, "learning_rate": 4.674396591779584e-05, "loss": 3.3657, "step": 207500 }, { "epoch": 1.31, "learning_rate": 4.673611721565722e-05, "loss": 3.351, "step": 208000 }, { "epoch": 1.31, "learning_rate": 4.672826851351861e-05, "loss": 3.3513, "step": 208500 }, { "epoch": 1.31, "learning_rate": 4.672043550878427e-05, "loss": 3.3547, "step": 209000 }, { "epoch": 1.32, "learning_rate": 4.671258680664565e-05, "loss": 3.356, "step": 209500 }, { "epoch": 1.32, "learning_rate": 4.670473810450704e-05, "loss": 3.3597, "step": 210000 }, { "epoch": 1.32, "eval_accuracy": 0.4153654267033467, "eval_loss": 3.267679452896118, "eval_runtime": 1450.6628, "eval_samples_per_second": 57.767, "eval_steps_per_second": 5.777, "step": 210000 }, { "epoch": 1.32, "learning_rate": 4.6696889402368426e-05, "loss": 3.3484, "step": 210500 }, { "epoch": 1.32, "learning_rate": 4.668905639763409e-05, "loss": 3.3593, "step": 211000 }, { "epoch": 1.33, "learning_rate": 4.6681223392899755e-05, "loss": 3.3575, "step": 211500 }, { "epoch": 1.33, "learning_rate": 4.6673374690761143e-05, "loss": 3.3513, "step": 212000 }, { "epoch": 1.33, "learning_rate": 4.6665525988622525e-05, "loss": 3.3527, "step": 212500 }, { "epoch": 1.34, "learning_rate": 4.6657677286483906e-05, "loss": 3.3522, "step": 213000 }, { "epoch": 1.34, "learning_rate": 4.6649844281749574e-05, "loss": 3.3476, "step": 213500 }, { "epoch": 1.34, "learning_rate": 4.6641995579610956e-05, "loss": 3.3528, "step": 214000 }, { "epoch": 1.35, "learning_rate": 4.6634146877472344e-05, "loss": 3.3535, "step": 214500 }, { "epoch": 1.35, "learning_rate": 4.6626298175333725e-05, "loss": 3.3408, "step": 215000 }, { "epoch": 1.35, "learning_rate": 4.661844947319511e-05, "loss": 3.3494, "step": 215500 }, { "epoch": 1.36, "learning_rate": 4.66106007710565e-05, "loss": 3.3467, "step": 216000 }, { "epoch": 1.36, "learning_rate": 4.660275206891789e-05, "loss": 3.3443, "step": 216500 }, { "epoch": 1.36, "learning_rate": 4.659490336677927e-05, "loss": 3.3483, "step": 217000 }, { "epoch": 1.37, "learning_rate": 4.658705466464066e-05, "loss": 3.3462, "step": 217500 }, { "epoch": 1.37, "learning_rate": 4.657920596250204e-05, "loss": 3.347, "step": 218000 }, { "epoch": 1.37, "learning_rate": 4.657135726036343e-05, "loss": 3.3498, "step": 218500 }, { "epoch": 1.38, "learning_rate": 4.656350855822482e-05, "loss": 3.3562, "step": 219000 }, { "epoch": 1.38, "learning_rate": 4.655567555349047e-05, "loss": 3.3477, "step": 219500 }, { "epoch": 1.38, "learning_rate": 4.654782685135186e-05, "loss": 3.3467, "step": 220000 }, { "epoch": 1.38, "learning_rate": 4.653997814921325e-05, "loss": 3.344, "step": 220500 }, { "epoch": 1.39, "learning_rate": 4.6532129447074636e-05, "loss": 3.3388, "step": 221000 }, { "epoch": 1.39, "learning_rate": 4.65242964423403e-05, "loss": 3.3456, "step": 221500 }, { "epoch": 1.39, "learning_rate": 4.6516447740201685e-05, "loss": 3.342, "step": 222000 }, { "epoch": 1.4, "learning_rate": 4.6508599038063067e-05, "loss": 3.3464, "step": 222500 }, { "epoch": 1.4, "learning_rate": 4.650076603332873e-05, "loss": 3.3419, "step": 223000 }, { "epoch": 1.4, "learning_rate": 4.6492917331190116e-05, "loss": 3.3492, "step": 223500 }, { "epoch": 1.41, "learning_rate": 4.6485068629051504e-05, "loss": 3.3477, "step": 224000 }, { "epoch": 1.41, "learning_rate": 4.647721992691289e-05, "loss": 3.3453, "step": 224500 }, { "epoch": 1.41, "learning_rate": 4.6469371224774274e-05, "loss": 3.3368, "step": 225000 }, { "epoch": 1.42, "learning_rate": 4.646152252263566e-05, "loss": 3.3435, "step": 225500 }, { "epoch": 1.42, "learning_rate": 4.645368951790132e-05, "loss": 3.3453, "step": 226000 }, { "epoch": 1.42, "learning_rate": 4.644584081576271e-05, "loss": 3.342, "step": 226500 }, { "epoch": 1.43, "learning_rate": 4.643799211362409e-05, "loss": 3.3373, "step": 227000 }, { "epoch": 1.43, "learning_rate": 4.6430143411485474e-05, "loss": 3.3392, "step": 227500 }, { "epoch": 1.43, "learning_rate": 4.642229470934686e-05, "loss": 3.3424, "step": 228000 }, { "epoch": 1.43, "learning_rate": 4.641444600720825e-05, "loss": 3.3424, "step": 228500 }, { "epoch": 1.44, "learning_rate": 4.640659730506964e-05, "loss": 3.3388, "step": 229000 }, { "epoch": 1.44, "learning_rate": 4.639874860293102e-05, "loss": 3.3355, "step": 229500 }, { "epoch": 1.44, "learning_rate": 4.639089990079241e-05, "loss": 3.3356, "step": 230000 }, { "epoch": 1.45, "learning_rate": 4.638306689605807e-05, "loss": 3.3379, "step": 230500 }, { "epoch": 1.45, "learning_rate": 4.637521819391946e-05, "loss": 3.3382, "step": 231000 }, { "epoch": 1.45, "learning_rate": 4.6367369491780845e-05, "loss": 3.3353, "step": 231500 }, { "epoch": 1.46, "learning_rate": 4.635952078964223e-05, "loss": 3.3337, "step": 232000 }, { "epoch": 1.46, "learning_rate": 4.635168778490789e-05, "loss": 3.3343, "step": 232500 }, { "epoch": 1.46, "learning_rate": 4.6343839082769276e-05, "loss": 3.3362, "step": 233000 }, { "epoch": 1.47, "learning_rate": 4.6335990380630664e-05, "loss": 3.3301, "step": 233500 }, { "epoch": 1.47, "learning_rate": 4.6328141678492046e-05, "loss": 3.3411, "step": 234000 }, { "epoch": 1.47, "learning_rate": 4.632029297635343e-05, "loss": 3.334, "step": 234500 }, { "epoch": 1.48, "learning_rate": 4.6312459971619095e-05, "loss": 3.3379, "step": 235000 }, { "epoch": 1.48, "learning_rate": 4.6304611269480483e-05, "loss": 3.33, "step": 235500 }, { "epoch": 1.48, "learning_rate": 4.6296762567341865e-05, "loss": 3.3259, "step": 236000 }, { "epoch": 1.48, "learning_rate": 4.628891386520325e-05, "loss": 3.3404, "step": 236500 }, { "epoch": 1.49, "learning_rate": 4.6281080860468914e-05, "loss": 3.3305, "step": 237000 }, { "epoch": 1.49, "learning_rate": 4.6273232158330296e-05, "loss": 3.3353, "step": 237500 }, { "epoch": 1.49, "learning_rate": 4.6265383456191684e-05, "loss": 3.3332, "step": 238000 }, { "epoch": 1.5, "learning_rate": 4.625753475405307e-05, "loss": 3.3264, "step": 238500 }, { "epoch": 1.5, "learning_rate": 4.624970174931873e-05, "loss": 3.3299, "step": 239000 }, { "epoch": 1.5, "learning_rate": 4.62418687445844e-05, "loss": 3.3233, "step": 239500 }, { "epoch": 1.51, "learning_rate": 4.623402004244578e-05, "loss": 3.3377, "step": 240000 }, { "epoch": 1.51, "eval_accuracy": 0.41876816047345233, "eval_loss": 3.2403032779693604, "eval_runtime": 1449.942, "eval_samples_per_second": 57.796, "eval_steps_per_second": 5.78, "step": 240000 }, { "epoch": 1.51, "learning_rate": 4.6226171340307164e-05, "loss": 3.329, "step": 240500 }, { "epoch": 1.51, "learning_rate": 4.621832263816855e-05, "loss": 3.3262, "step": 241000 }, { "epoch": 1.52, "learning_rate": 4.621047393602994e-05, "loss": 3.3287, "step": 241500 }, { "epoch": 1.52, "learning_rate": 4.620262523389133e-05, "loss": 3.3286, "step": 242000 }, { "epoch": 1.52, "learning_rate": 4.619477653175271e-05, "loss": 3.3283, "step": 242500 }, { "epoch": 1.53, "learning_rate": 4.61869278296141e-05, "loss": 3.3293, "step": 243000 }, { "epoch": 1.53, "learning_rate": 4.6179079127475486e-05, "loss": 3.3231, "step": 243500 }, { "epoch": 1.53, "learning_rate": 4.617123042533687e-05, "loss": 3.3342, "step": 244000 }, { "epoch": 1.54, "learning_rate": 4.6163381723198256e-05, "loss": 3.3204, "step": 244500 }, { "epoch": 1.54, "learning_rate": 4.615553302105964e-05, "loss": 3.318, "step": 245000 }, { "epoch": 1.54, "learning_rate": 4.61477000163253e-05, "loss": 3.3227, "step": 245500 }, { "epoch": 1.54, "learning_rate": 4.6139851314186686e-05, "loss": 3.3234, "step": 246000 }, { "epoch": 1.55, "learning_rate": 4.6132002612048075e-05, "loss": 3.323, "step": 246500 }, { "epoch": 1.55, "learning_rate": 4.612415390990946e-05, "loss": 3.3304, "step": 247000 }, { "epoch": 1.55, "learning_rate": 4.611632090517512e-05, "loss": 3.3244, "step": 247500 }, { "epoch": 1.56, "learning_rate": 4.6108472203036505e-05, "loss": 3.3231, "step": 248000 }, { "epoch": 1.56, "learning_rate": 4.6100623500897894e-05, "loss": 3.3277, "step": 248500 }, { "epoch": 1.56, "learning_rate": 4.6092790496163555e-05, "loss": 3.3266, "step": 249000 }, { "epoch": 1.57, "learning_rate": 4.608494179402494e-05, "loss": 3.32, "step": 249500 }, { "epoch": 1.57, "learning_rate": 4.6077093091886324e-05, "loss": 3.316, "step": 250000 }, { "epoch": 1.57, "learning_rate": 4.606924438974771e-05, "loss": 3.3285, "step": 250500 }, { "epoch": 1.58, "learning_rate": 4.60613956876091e-05, "loss": 3.3224, "step": 251000 }, { "epoch": 1.58, "learning_rate": 4.605354698547049e-05, "loss": 3.3236, "step": 251500 }, { "epoch": 1.58, "learning_rate": 4.604569828333187e-05, "loss": 3.3204, "step": 252000 }, { "epoch": 1.59, "learning_rate": 4.603784958119325e-05, "loss": 3.3159, "step": 252500 }, { "epoch": 1.59, "learning_rate": 4.603000087905464e-05, "loss": 3.3188, "step": 253000 }, { "epoch": 1.59, "learning_rate": 4.602215217691603e-05, "loss": 3.3213, "step": 253500 }, { "epoch": 1.59, "learning_rate": 4.601435056699024e-05, "loss": 3.3193, "step": 254000 }, { "epoch": 1.6, "learning_rate": 4.600650186485163e-05, "loss": 3.3205, "step": 254500 }, { "epoch": 1.6, "learning_rate": 4.599865316271302e-05, "loss": 3.3215, "step": 255000 }, { "epoch": 1.6, "learning_rate": 4.59908044605744e-05, "loss": 3.3156, "step": 255500 }, { "epoch": 1.61, "learning_rate": 4.598295575843579e-05, "loss": 3.3261, "step": 256000 }, { "epoch": 1.61, "learning_rate": 4.5975107056297176e-05, "loss": 3.323, "step": 256500 }, { "epoch": 1.61, "learning_rate": 4.596725835415856e-05, "loss": 3.3166, "step": 257000 }, { "epoch": 1.62, "learning_rate": 4.5959409652019946e-05, "loss": 3.3106, "step": 257500 }, { "epoch": 1.62, "learning_rate": 4.595156094988133e-05, "loss": 3.3095, "step": 258000 }, { "epoch": 1.62, "learning_rate": 4.5943712247742715e-05, "loss": 3.3095, "step": 258500 }, { "epoch": 1.63, "learning_rate": 4.59358635456041e-05, "loss": 3.3196, "step": 259000 }, { "epoch": 1.63, "learning_rate": 4.592801484346549e-05, "loss": 3.3197, "step": 259500 }, { "epoch": 1.63, "learning_rate": 4.592016614132687e-05, "loss": 3.3184, "step": 260000 }, { "epoch": 1.64, "learning_rate": 4.591234883399681e-05, "loss": 3.3139, "step": 260500 }, { "epoch": 1.64, "learning_rate": 4.5904500131858196e-05, "loss": 3.3143, "step": 261000 }, { "epoch": 1.64, "learning_rate": 4.5896651429719584e-05, "loss": 3.3169, "step": 261500 }, { "epoch": 1.65, "learning_rate": 4.588880272758097e-05, "loss": 3.3168, "step": 262000 }, { "epoch": 1.65, "learning_rate": 4.588095402544236e-05, "loss": 3.3137, "step": 262500 }, { "epoch": 1.65, "learning_rate": 4.587310532330374e-05, "loss": 3.3133, "step": 263000 }, { "epoch": 1.65, "learning_rate": 4.586525662116512e-05, "loss": 3.3155, "step": 263500 }, { "epoch": 1.66, "learning_rate": 4.585740791902651e-05, "loss": 3.3109, "step": 264000 }, { "epoch": 1.66, "learning_rate": 4.584957491429218e-05, "loss": 3.3039, "step": 264500 }, { "epoch": 1.66, "learning_rate": 4.584172621215356e-05, "loss": 3.309, "step": 265000 }, { "epoch": 1.67, "learning_rate": 4.583387751001494e-05, "loss": 3.309, "step": 265500 }, { "epoch": 1.67, "learning_rate": 4.582602880787633e-05, "loss": 3.309, "step": 266000 }, { "epoch": 1.67, "learning_rate": 4.581818010573772e-05, "loss": 3.3177, "step": 266500 }, { "epoch": 1.68, "learning_rate": 4.5810331403599106e-05, "loss": 3.3098, "step": 267000 }, { "epoch": 1.68, "learning_rate": 4.580248270146049e-05, "loss": 3.3073, "step": 267500 }, { "epoch": 1.68, "learning_rate": 4.579464969672615e-05, "loss": 3.3125, "step": 268000 }, { "epoch": 1.69, "learning_rate": 4.578680099458754e-05, "loss": 3.3065, "step": 268500 }, { "epoch": 1.69, "learning_rate": 4.5778952292448925e-05, "loss": 3.3012, "step": 269000 }, { "epoch": 1.69, "learning_rate": 4.577110359031031e-05, "loss": 3.3071, "step": 269500 }, { "epoch": 1.7, "learning_rate": 4.5763254888171695e-05, "loss": 3.3061, "step": 270000 }, { "epoch": 1.7, "eval_accuracy": 0.42134437723180795, "eval_loss": 3.218022346496582, "eval_runtime": 1446.0819, "eval_samples_per_second": 57.95, "eval_steps_per_second": 5.796, "step": 270000 }, { "epoch": 1.7, "learning_rate": 4.5755406186033076e-05, "loss": 3.3079, "step": 270500 }, { "epoch": 1.7, "learning_rate": 4.5747557483894464e-05, "loss": 3.3138, "step": 271000 }, { "epoch": 1.7, "learning_rate": 4.573970878175585e-05, "loss": 3.3138, "step": 271500 }, { "epoch": 1.71, "learning_rate": 4.5731875777021513e-05, "loss": 3.3071, "step": 272000 }, { "epoch": 1.71, "learning_rate": 4.5724027074882895e-05, "loss": 3.3041, "step": 272500 }, { "epoch": 1.71, "learning_rate": 4.571617837274428e-05, "loss": 3.3033, "step": 273000 }, { "epoch": 1.72, "learning_rate": 4.570832967060567e-05, "loss": 3.3114, "step": 273500 }, { "epoch": 1.72, "learning_rate": 4.570049666587133e-05, "loss": 3.3087, "step": 274000 }, { "epoch": 1.72, "learning_rate": 4.569264796373272e-05, "loss": 3.3043, "step": 274500 }, { "epoch": 1.73, "learning_rate": 4.568481495899838e-05, "loss": 3.3061, "step": 275000 }, { "epoch": 1.73, "learning_rate": 4.567696625685976e-05, "loss": 3.3094, "step": 275500 }, { "epoch": 1.73, "learning_rate": 4.566911755472115e-05, "loss": 3.3041, "step": 276000 }, { "epoch": 1.74, "learning_rate": 4.566126885258254e-05, "loss": 3.3023, "step": 276500 }, { "epoch": 1.74, "learning_rate": 4.565342015044393e-05, "loss": 3.304, "step": 277000 }, { "epoch": 1.74, "learning_rate": 4.5645571448305316e-05, "loss": 3.3011, "step": 277500 }, { "epoch": 1.75, "learning_rate": 4.563775414097525e-05, "loss": 3.3004, "step": 278000 }, { "epoch": 1.75, "learning_rate": 4.562990543883663e-05, "loss": 3.2979, "step": 278500 }, { "epoch": 1.75, "learning_rate": 4.562205673669802e-05, "loss": 3.3059, "step": 279000 }, { "epoch": 1.75, "learning_rate": 4.561420803455941e-05, "loss": 3.3027, "step": 279500 }, { "epoch": 1.76, "learning_rate": 4.5606359332420796e-05, "loss": 3.3021, "step": 280000 }, { "epoch": 1.76, "learning_rate": 4.559852632768646e-05, "loss": 3.3061, "step": 280500 }, { "epoch": 1.76, "learning_rate": 4.559067762554784e-05, "loss": 3.3046, "step": 281000 }, { "epoch": 1.77, "learning_rate": 4.558282892340923e-05, "loss": 3.2993, "step": 281500 }, { "epoch": 1.77, "learning_rate": 4.5574980221270615e-05, "loss": 3.2985, "step": 282000 }, { "epoch": 1.77, "learning_rate": 4.5567147216536277e-05, "loss": 3.2999, "step": 282500 }, { "epoch": 1.78, "learning_rate": 4.5559298514397665e-05, "loss": 3.3009, "step": 283000 }, { "epoch": 1.78, "learning_rate": 4.5551449812259046e-05, "loss": 3.2962, "step": 283500 }, { "epoch": 1.78, "learning_rate": 4.5543601110120434e-05, "loss": 3.3054, "step": 284000 }, { "epoch": 1.79, "learning_rate": 4.5535752407981816e-05, "loss": 3.2936, "step": 284500 }, { "epoch": 1.79, "learning_rate": 4.5527903705843204e-05, "loss": 3.2904, "step": 285000 }, { "epoch": 1.79, "learning_rate": 4.5520055003704585e-05, "loss": 3.3036, "step": 285500 }, { "epoch": 1.8, "learning_rate": 4.551220630156597e-05, "loss": 3.3, "step": 286000 }, { "epoch": 1.8, "learning_rate": 4.550435759942736e-05, "loss": 3.3018, "step": 286500 }, { "epoch": 1.8, "learning_rate": 4.549650889728875e-05, "loss": 3.2984, "step": 287000 }, { "epoch": 1.81, "learning_rate": 4.548866019515014e-05, "loss": 3.3083, "step": 287500 }, { "epoch": 1.81, "learning_rate": 4.548081149301152e-05, "loss": 3.2844, "step": 288000 }, { "epoch": 1.81, "learning_rate": 4.54729627908729e-05, "loss": 3.3009, "step": 288500 }, { "epoch": 1.81, "learning_rate": 4.546516118094712e-05, "loss": 3.3006, "step": 289000 }, { "epoch": 1.82, "learning_rate": 4.54573124788085e-05, "loss": 3.2918, "step": 289500 }, { "epoch": 1.82, "learning_rate": 4.544947947407417e-05, "loss": 3.2985, "step": 290000 }, { "epoch": 1.82, "learning_rate": 4.544163077193556e-05, "loss": 3.2886, "step": 290500 }, { "epoch": 1.83, "learning_rate": 4.543378206979694e-05, "loss": 3.2957, "step": 291000 }, { "epoch": 1.83, "learning_rate": 4.542593336765832e-05, "loss": 3.2969, "step": 291500 }, { "epoch": 1.83, "learning_rate": 4.541808466551971e-05, "loss": 3.2923, "step": 292000 }, { "epoch": 1.84, "learning_rate": 4.54102359633811e-05, "loss": 3.2922, "step": 292500 }, { "epoch": 1.84, "learning_rate": 4.5402387261242486e-05, "loss": 3.2918, "step": 293000 }, { "epoch": 1.84, "learning_rate": 4.539453855910387e-05, "loss": 3.2883, "step": 293500 }, { "epoch": 1.85, "learning_rate": 4.5386689856965256e-05, "loss": 3.2904, "step": 294000 }, { "epoch": 1.85, "learning_rate": 4.537885685223092e-05, "loss": 3.2895, "step": 294500 }, { "epoch": 1.85, "learning_rate": 4.5371008150092305e-05, "loss": 3.2857, "step": 295000 }, { "epoch": 1.86, "learning_rate": 4.5363159447953693e-05, "loss": 3.289, "step": 295500 }, { "epoch": 1.86, "learning_rate": 4.5355310745815075e-05, "loss": 3.2845, "step": 296000 }, { "epoch": 1.86, "learning_rate": 4.5347462043676456e-05, "loss": 3.2979, "step": 296500 }, { "epoch": 1.86, "learning_rate": 4.5339613341537844e-05, "loss": 3.2863, "step": 297000 }, { "epoch": 1.87, "learning_rate": 4.533176463939923e-05, "loss": 3.2883, "step": 297500 }, { "epoch": 1.87, "learning_rate": 4.532391593726062e-05, "loss": 3.2861, "step": 298000 }, { "epoch": 1.87, "learning_rate": 4.5316067235122e-05, "loss": 3.2883, "step": 298500 }, { "epoch": 1.88, "learning_rate": 4.530821853298339e-05, "loss": 3.2872, "step": 299000 }, { "epoch": 1.88, "learning_rate": 4.530036983084477e-05, "loss": 3.2846, "step": 299500 }, { "epoch": 1.88, "learning_rate": 4.529252112870616e-05, "loss": 3.2871, "step": 300000 }, { "epoch": 1.88, "eval_accuracy": 0.42369420466302055, "eval_loss": 3.1973586082458496, "eval_runtime": 1451.2807, "eval_samples_per_second": 57.743, "eval_steps_per_second": 5.775, "step": 300000 }, { "epoch": 1.89, "learning_rate": 4.528468812397183e-05, "loss": 3.2846, "step": 300500 }, { "epoch": 1.89, "learning_rate": 4.527683942183321e-05, "loss": 3.2827, "step": 301000 }, { "epoch": 1.89, "learning_rate": 4.526899071969459e-05, "loss": 3.2858, "step": 301500 }, { "epoch": 1.9, "learning_rate": 4.526115771496026e-05, "loss": 3.2857, "step": 302000 }, { "epoch": 1.9, "learning_rate": 4.525330901282164e-05, "loss": 3.2888, "step": 302500 }, { "epoch": 1.9, "learning_rate": 4.524546031068303e-05, "loss": 3.2861, "step": 303000 }, { "epoch": 1.91, "learning_rate": 4.523761160854441e-05, "loss": 3.2929, "step": 303500 }, { "epoch": 1.91, "learning_rate": 4.52297629064058e-05, "loss": 3.2841, "step": 304000 }, { "epoch": 1.91, "learning_rate": 4.5221914204267186e-05, "loss": 3.2839, "step": 304500 }, { "epoch": 1.92, "learning_rate": 4.521408119953285e-05, "loss": 3.2895, "step": 305000 }, { "epoch": 1.92, "learning_rate": 4.5206232497394235e-05, "loss": 3.2836, "step": 305500 }, { "epoch": 1.92, "learning_rate": 4.5198383795255616e-05, "loss": 3.2771, "step": 306000 }, { "epoch": 1.92, "learning_rate": 4.519055079052128e-05, "loss": 3.2824, "step": 306500 }, { "epoch": 1.93, "learning_rate": 4.5182702088382666e-05, "loss": 3.2882, "step": 307000 }, { "epoch": 1.93, "learning_rate": 4.5174853386244054e-05, "loss": 3.2825, "step": 307500 }, { "epoch": 1.93, "learning_rate": 4.516700468410544e-05, "loss": 3.2798, "step": 308000 }, { "epoch": 1.94, "learning_rate": 4.5159155981966824e-05, "loss": 3.2804, "step": 308500 }, { "epoch": 1.94, "learning_rate": 4.515130727982821e-05, "loss": 3.2832, "step": 309000 }, { "epoch": 1.94, "learning_rate": 4.514345857768959e-05, "loss": 3.2816, "step": 309500 }, { "epoch": 1.95, "learning_rate": 4.513560987555098e-05, "loss": 3.2805, "step": 310000 }, { "epoch": 1.95, "learning_rate": 4.512776117341236e-05, "loss": 3.276, "step": 310500 }, { "epoch": 1.95, "learning_rate": 4.511991247127375e-05, "loss": 3.2799, "step": 311000 }, { "epoch": 1.96, "learning_rate": 4.511206376913514e-05, "loss": 3.2913, "step": 311500 }, { "epoch": 1.96, "learning_rate": 4.510421506699653e-05, "loss": 3.2832, "step": 312000 }, { "epoch": 1.96, "learning_rate": 4.509636636485791e-05, "loss": 3.2806, "step": 312500 }, { "epoch": 1.97, "learning_rate": 4.508851766271929e-05, "loss": 3.2816, "step": 313000 }, { "epoch": 1.97, "learning_rate": 4.508068465798496e-05, "loss": 3.2781, "step": 313500 }, { "epoch": 1.97, "learning_rate": 4.5072835955846346e-05, "loss": 3.2839, "step": 314000 }, { "epoch": 1.97, "learning_rate": 4.506498725370773e-05, "loss": 3.2708, "step": 314500 }, { "epoch": 1.98, "learning_rate": 4.5057138551569115e-05, "loss": 3.2737, "step": 315000 }, { "epoch": 1.98, "learning_rate": 4.50492898494305e-05, "loss": 3.2737, "step": 315500 }, { "epoch": 1.98, "learning_rate": 4.5041441147291885e-05, "loss": 3.2769, "step": 316000 }, { "epoch": 1.99, "learning_rate": 4.503359244515327e-05, "loss": 3.2832, "step": 316500 }, { "epoch": 1.99, "learning_rate": 4.5025759440418934e-05, "loss": 3.2758, "step": 317000 }, { "epoch": 1.99, "learning_rate": 4.501791073828032e-05, "loss": 3.2778, "step": 317500 }, { "epoch": 2.0, "learning_rate": 4.5010062036141704e-05, "loss": 3.2765, "step": 318000 }, { "epoch": 2.0, "learning_rate": 4.500221333400309e-05, "loss": 3.278, "step": 318500 }, { "epoch": 2.0, "learning_rate": 4.499436463186448e-05, "loss": 3.2559, "step": 319000 }, { "epoch": 2.01, "learning_rate": 4.498651592972586e-05, "loss": 3.2559, "step": 319500 }, { "epoch": 2.01, "learning_rate": 4.497866722758724e-05, "loss": 3.256, "step": 320000 }, { "epoch": 2.01, "learning_rate": 4.497081852544863e-05, "loss": 3.2559, "step": 320500 }, { "epoch": 2.02, "learning_rate": 4.496296982331002e-05, "loss": 3.2533, "step": 321000 }, { "epoch": 2.02, "learning_rate": 4.495513681857568e-05, "loss": 3.2611, "step": 321500 }, { "epoch": 2.02, "learning_rate": 4.494728811643707e-05, "loss": 3.2599, "step": 322000 }, { "epoch": 2.02, "learning_rate": 4.493943941429846e-05, "loss": 3.2578, "step": 322500 }, { "epoch": 2.03, "learning_rate": 4.493159071215984e-05, "loss": 3.2548, "step": 323000 }, { "epoch": 2.03, "learning_rate": 4.49237577074255e-05, "loss": 3.2548, "step": 323500 }, { "epoch": 2.03, "learning_rate": 4.491590900528689e-05, "loss": 3.2579, "step": 324000 }, { "epoch": 2.04, "learning_rate": 4.4908060303148276e-05, "loss": 3.256, "step": 324500 }, { "epoch": 2.04, "learning_rate": 4.490022729841394e-05, "loss": 3.2577, "step": 325000 }, { "epoch": 2.04, "learning_rate": 4.489237859627532e-05, "loss": 3.2523, "step": 325500 }, { "epoch": 2.05, "learning_rate": 4.4884529894136707e-05, "loss": 3.2619, "step": 326000 }, { "epoch": 2.05, "learning_rate": 4.4876681191998095e-05, "loss": 3.2576, "step": 326500 }, { "epoch": 2.05, "learning_rate": 4.486883248985948e-05, "loss": 3.2515, "step": 327000 }, { "epoch": 2.06, "learning_rate": 4.4860983787720864e-05, "loss": 3.2618, "step": 327500 }, { "epoch": 2.06, "learning_rate": 4.4853135085582246e-05, "loss": 3.2572, "step": 328000 }, { "epoch": 2.06, "learning_rate": 4.4845286383443634e-05, "loss": 3.2524, "step": 328500 }, { "epoch": 2.07, "learning_rate": 4.483743768130502e-05, "loss": 3.2538, "step": 329000 }, { "epoch": 2.07, "learning_rate": 4.482962037397496e-05, "loss": 3.256, "step": 329500 }, { "epoch": 2.07, "learning_rate": 4.4821787369240625e-05, "loss": 3.2503, "step": 330000 }, { "epoch": 2.07, "eval_accuracy": 0.42597190898985743, "eval_loss": 3.1802444458007812, "eval_runtime": 1449.4054, "eval_samples_per_second": 57.818, "eval_steps_per_second": 5.782, "step": 330000 }, { "epoch": 2.08, "learning_rate": 4.481393866710201e-05, "loss": 3.2495, "step": 330500 }, { "epoch": 2.08, "learning_rate": 4.4806089964963394e-05, "loss": 3.2523, "step": 331000 }, { "epoch": 2.08, "learning_rate": 4.479824126282478e-05, "loss": 3.2548, "step": 331500 }, { "epoch": 2.08, "learning_rate": 4.479039256068617e-05, "loss": 3.2526, "step": 332000 }, { "epoch": 2.09, "learning_rate": 4.478254385854755e-05, "loss": 3.2589, "step": 332500 }, { "epoch": 2.09, "learning_rate": 4.477469515640893e-05, "loss": 3.2595, "step": 333000 }, { "epoch": 2.09, "learning_rate": 4.476684645427032e-05, "loss": 3.2615, "step": 333500 }, { "epoch": 2.1, "learning_rate": 4.475899775213171e-05, "loss": 3.2527, "step": 334000 }, { "epoch": 2.1, "learning_rate": 4.47511490499931e-05, "loss": 3.2529, "step": 334500 }, { "epoch": 2.1, "learning_rate": 4.4743300347854486e-05, "loss": 3.2669, "step": 335000 }, { "epoch": 2.11, "learning_rate": 4.473546734312015e-05, "loss": 3.2567, "step": 335500 }, { "epoch": 2.11, "learning_rate": 4.472761864098153e-05, "loss": 3.2493, "step": 336000 }, { "epoch": 2.11, "learning_rate": 4.4719769938842916e-05, "loss": 3.261, "step": 336500 }, { "epoch": 2.12, "learning_rate": 4.4711921236704304e-05, "loss": 3.2558, "step": 337000 }, { "epoch": 2.12, "learning_rate": 4.4704088231969966e-05, "loss": 3.253, "step": 337500 }, { "epoch": 2.12, "learning_rate": 4.4696239529831354e-05, "loss": 3.2539, "step": 338000 }, { "epoch": 2.13, "learning_rate": 4.4688390827692735e-05, "loss": 3.2516, "step": 338500 }, { "epoch": 2.13, "learning_rate": 4.4680542125554123e-05, "loss": 3.2606, "step": 339000 }, { "epoch": 2.13, "learning_rate": 4.4672693423415505e-05, "loss": 3.2515, "step": 339500 }, { "epoch": 2.13, "learning_rate": 4.466484472127689e-05, "loss": 3.2552, "step": 340000 }, { "epoch": 2.14, "learning_rate": 4.4656996019138274e-05, "loss": 3.2569, "step": 340500 }, { "epoch": 2.14, "learning_rate": 4.464914731699966e-05, "loss": 3.2612, "step": 341000 }, { "epoch": 2.14, "learning_rate": 4.4641314312265324e-05, "loss": 3.2598, "step": 341500 }, { "epoch": 2.15, "learning_rate": 4.463346561012671e-05, "loss": 3.2494, "step": 342000 }, { "epoch": 2.15, "learning_rate": 4.462563260539237e-05, "loss": 3.2585, "step": 342500 }, { "epoch": 2.15, "learning_rate": 4.461779960065804e-05, "loss": 3.2542, "step": 343000 }, { "epoch": 2.16, "learning_rate": 4.460995089851942e-05, "loss": 3.2515, "step": 343500 }, { "epoch": 2.16, "learning_rate": 4.4602117893785084e-05, "loss": 3.2538, "step": 344000 }, { "epoch": 2.16, "learning_rate": 4.459426919164647e-05, "loss": 3.2569, "step": 344500 }, { "epoch": 2.17, "learning_rate": 4.458642048950786e-05, "loss": 3.2557, "step": 345000 }, { "epoch": 2.17, "learning_rate": 4.457857178736924e-05, "loss": 3.2506, "step": 345500 }, { "epoch": 2.17, "learning_rate": 4.457072308523062e-05, "loss": 3.2518, "step": 346000 }, { "epoch": 2.18, "learning_rate": 4.456287438309201e-05, "loss": 3.2516, "step": 346500 }, { "epoch": 2.18, "learning_rate": 4.45550256809534e-05, "loss": 3.2556, "step": 347000 }, { "epoch": 2.18, "learning_rate": 4.454717697881479e-05, "loss": 3.2476, "step": 347500 }, { "epoch": 2.19, "learning_rate": 4.453934397408045e-05, "loss": 3.2529, "step": 348000 }, { "epoch": 2.19, "learning_rate": 4.453149527194184e-05, "loss": 3.2493, "step": 348500 }, { "epoch": 2.19, "learning_rate": 4.452364656980322e-05, "loss": 3.2532, "step": 349000 }, { "epoch": 2.19, "learning_rate": 4.4515797867664607e-05, "loss": 3.249, "step": 349500 }, { "epoch": 2.2, "learning_rate": 4.4507949165525995e-05, "loss": 3.2485, "step": 350000 }, { "epoch": 2.2, "learning_rate": 4.4500100463387376e-05, "loss": 3.2492, "step": 350500 }, { "epoch": 2.2, "learning_rate": 4.449225176124876e-05, "loss": 3.2517, "step": 351000 }, { "epoch": 2.21, "learning_rate": 4.4484403059110145e-05, "loss": 3.2543, "step": 351500 }, { "epoch": 2.21, "learning_rate": 4.4476554356971534e-05, "loss": 3.2559, "step": 352000 }, { "epoch": 2.21, "learning_rate": 4.4468721352237195e-05, "loss": 3.2462, "step": 352500 }, { "epoch": 2.22, "learning_rate": 4.446087265009858e-05, "loss": 3.2564, "step": 353000 }, { "epoch": 2.22, "learning_rate": 4.4453023947959964e-05, "loss": 3.2476, "step": 353500 }, { "epoch": 2.22, "learning_rate": 4.444517524582135e-05, "loss": 3.2551, "step": 354000 }, { "epoch": 2.23, "learning_rate": 4.443732654368274e-05, "loss": 3.236, "step": 354500 }, { "epoch": 2.23, "learning_rate": 4.442947784154413e-05, "loss": 3.2555, "step": 355000 }, { "epoch": 2.23, "learning_rate": 4.442162913940551e-05, "loss": 3.2532, "step": 355500 }, { "epoch": 2.24, "learning_rate": 4.441378043726689e-05, "loss": 3.2479, "step": 356000 }, { "epoch": 2.24, "learning_rate": 4.440596312993683e-05, "loss": 3.2409, "step": 356500 }, { "epoch": 2.24, "learning_rate": 4.439811442779822e-05, "loss": 3.2433, "step": 357000 }, { "epoch": 2.24, "learning_rate": 4.439026572565961e-05, "loss": 3.2476, "step": 357500 }, { "epoch": 2.25, "learning_rate": 4.4382417023521e-05, "loss": 3.2472, "step": 358000 }, { "epoch": 2.25, "learning_rate": 4.437456832138238e-05, "loss": 3.2518, "step": 358500 }, { "epoch": 2.25, "learning_rate": 4.436673531664804e-05, "loss": 3.2522, "step": 359000 }, { "epoch": 2.26, "learning_rate": 4.435888661450943e-05, "loss": 3.2432, "step": 359500 }, { "epoch": 2.26, "learning_rate": 4.4351037912370816e-05, "loss": 3.2463, "step": 360000 }, { "epoch": 2.26, "eval_accuracy": 0.4277281876513697, "eval_loss": 3.1649253368377686, "eval_runtime": 1449.1254, "eval_samples_per_second": 57.829, "eval_steps_per_second": 5.783, "step": 360000 }, { "epoch": 2.26, "learning_rate": 4.43431892102322e-05, "loss": 3.2402, "step": 360500 }, { "epoch": 2.27, "learning_rate": 4.433534050809358e-05, "loss": 3.2475, "step": 361000 }, { "epoch": 2.27, "learning_rate": 4.432749180595497e-05, "loss": 3.2521, "step": 361500 }, { "epoch": 2.27, "learning_rate": 4.4319643103816355e-05, "loss": 3.2469, "step": 362000 }, { "epoch": 2.28, "learning_rate": 4.4311794401677743e-05, "loss": 3.248, "step": 362500 }, { "epoch": 2.28, "learning_rate": 4.4303961396943405e-05, "loss": 3.247, "step": 363000 }, { "epoch": 2.28, "learning_rate": 4.4296128392209066e-05, "loss": 3.2447, "step": 363500 }, { "epoch": 2.29, "learning_rate": 4.428827969007045e-05, "loss": 3.2471, "step": 364000 }, { "epoch": 2.29, "learning_rate": 4.4280430987931836e-05, "loss": 3.2534, "step": 364500 }, { "epoch": 2.29, "learning_rate": 4.4272582285793224e-05, "loss": 3.2506, "step": 365000 }, { "epoch": 2.29, "learning_rate": 4.426473358365461e-05, "loss": 3.2402, "step": 365500 }, { "epoch": 2.3, "learning_rate": 4.425688488151599e-05, "loss": 3.2432, "step": 366000 }, { "epoch": 2.3, "learning_rate": 4.424903617937738e-05, "loss": 3.2446, "step": 366500 }, { "epoch": 2.3, "learning_rate": 4.424118747723876e-05, "loss": 3.2506, "step": 367000 }, { "epoch": 2.31, "learning_rate": 4.423333877510015e-05, "loss": 3.2488, "step": 367500 }, { "epoch": 2.31, "learning_rate": 4.422549007296154e-05, "loss": 3.2447, "step": 368000 }, { "epoch": 2.31, "learning_rate": 4.42176570682272e-05, "loss": 3.2477, "step": 368500 }, { "epoch": 2.32, "learning_rate": 4.420980836608858e-05, "loss": 3.2486, "step": 369000 }, { "epoch": 2.32, "learning_rate": 4.420195966394997e-05, "loss": 3.2498, "step": 369500 }, { "epoch": 2.32, "learning_rate": 4.419411096181136e-05, "loss": 3.2412, "step": 370000 }, { "epoch": 2.33, "learning_rate": 4.4186262259672746e-05, "loss": 3.2507, "step": 370500 }, { "epoch": 2.33, "learning_rate": 4.417842925493841e-05, "loss": 3.2488, "step": 371000 }, { "epoch": 2.33, "learning_rate": 4.417058055279979e-05, "loss": 3.245, "step": 371500 }, { "epoch": 2.34, "learning_rate": 4.416273185066118e-05, "loss": 3.2386, "step": 372000 }, { "epoch": 2.34, "learning_rate": 4.4154883148522565e-05, "loss": 3.2455, "step": 372500 }, { "epoch": 2.34, "learning_rate": 4.414703444638395e-05, "loss": 3.2431, "step": 373000 }, { "epoch": 2.35, "learning_rate": 4.4139185744245335e-05, "loss": 3.2432, "step": 373500 }, { "epoch": 2.35, "learning_rate": 4.4131337042106716e-05, "loss": 3.2427, "step": 374000 }, { "epoch": 2.35, "learning_rate": 4.4123488339968104e-05, "loss": 3.2434, "step": 374500 }, { "epoch": 2.35, "learning_rate": 4.4115655335233765e-05, "loss": 3.2422, "step": 375000 }, { "epoch": 2.36, "learning_rate": 4.4107806633095154e-05, "loss": 3.2371, "step": 375500 }, { "epoch": 2.36, "learning_rate": 4.4099957930956535e-05, "loss": 3.2483, "step": 376000 }, { "epoch": 2.36, "learning_rate": 4.409210922881792e-05, "loss": 3.2464, "step": 376500 }, { "epoch": 2.37, "learning_rate": 4.4084276224083584e-05, "loss": 3.2444, "step": 377000 }, { "epoch": 2.37, "learning_rate": 4.407642752194497e-05, "loss": 3.2422, "step": 377500 }, { "epoch": 2.37, "learning_rate": 4.406857881980636e-05, "loss": 3.2487, "step": 378000 }, { "epoch": 2.38, "learning_rate": 4.406073011766774e-05, "loss": 3.2406, "step": 378500 }, { "epoch": 2.38, "learning_rate": 4.4052897112933403e-05, "loss": 3.2426, "step": 379000 }, { "epoch": 2.38, "learning_rate": 4.404504841079479e-05, "loss": 3.234, "step": 379500 }, { "epoch": 2.39, "learning_rate": 4.403719970865618e-05, "loss": 3.2391, "step": 380000 }, { "epoch": 2.39, "learning_rate": 4.402935100651757e-05, "loss": 3.2329, "step": 380500 }, { "epoch": 2.39, "learning_rate": 4.402150230437895e-05, "loss": 3.236, "step": 381000 }, { "epoch": 2.4, "learning_rate": 4.401365360224034e-05, "loss": 3.2385, "step": 381500 }, { "epoch": 2.4, "learning_rate": 4.400580490010172e-05, "loss": 3.2356, "step": 382000 }, { "epoch": 2.4, "learning_rate": 4.399800329017594e-05, "loss": 3.2399, "step": 382500 }, { "epoch": 2.4, "learning_rate": 4.399015458803732e-05, "loss": 3.2448, "step": 383000 }, { "epoch": 2.41, "learning_rate": 4.398230588589871e-05, "loss": 3.2423, "step": 383500 }, { "epoch": 2.41, "learning_rate": 4.39744571837601e-05, "loss": 3.2436, "step": 384000 }, { "epoch": 2.41, "learning_rate": 4.396660848162148e-05, "loss": 3.2308, "step": 384500 }, { "epoch": 2.42, "learning_rate": 4.395875977948287e-05, "loss": 3.2374, "step": 385000 }, { "epoch": 2.42, "learning_rate": 4.3950911077344255e-05, "loss": 3.2332, "step": 385500 }, { "epoch": 2.42, "learning_rate": 4.394306237520564e-05, "loss": 3.2397, "step": 386000 }, { "epoch": 2.43, "learning_rate": 4.3935213673067025e-05, "loss": 3.2406, "step": 386500 }, { "epoch": 2.43, "learning_rate": 4.3927364970928406e-05, "loss": 3.2316, "step": 387000 }, { "epoch": 2.43, "learning_rate": 4.3919516268789794e-05, "loss": 3.238, "step": 387500 }, { "epoch": 2.44, "learning_rate": 4.391166756665118e-05, "loss": 3.2421, "step": 388000 }, { "epoch": 2.44, "learning_rate": 4.3903834561916844e-05, "loss": 3.2396, "step": 388500 }, { "epoch": 2.44, "learning_rate": 4.3895985859778225e-05, "loss": 3.2375, "step": 389000 }, { "epoch": 2.45, "learning_rate": 4.388813715763961e-05, "loss": 3.2391, "step": 389500 }, { "epoch": 2.45, "learning_rate": 4.3880288455501e-05, "loss": 3.2457, "step": 390000 }, { "epoch": 2.45, "eval_accuracy": 0.42934576085693305, "eval_loss": 3.151153564453125, "eval_runtime": 1448.1875, "eval_samples_per_second": 57.866, "eval_steps_per_second": 5.787, "step": 390000 }, { "epoch": 2.45, "learning_rate": 4.387243975336239e-05, "loss": 3.2395, "step": 390500 }, { "epoch": 2.46, "learning_rate": 4.386459105122377e-05, "loss": 3.2393, "step": 391000 }, { "epoch": 2.46, "learning_rate": 4.385674234908516e-05, "loss": 3.2324, "step": 391500 }, { "epoch": 2.46, "learning_rate": 4.384889364694654e-05, "loss": 3.2351, "step": 392000 }, { "epoch": 2.46, "learning_rate": 4.384104494480793e-05, "loss": 3.2344, "step": 392500 }, { "epoch": 2.47, "learning_rate": 4.383321194007359e-05, "loss": 3.2349, "step": 393000 }, { "epoch": 2.47, "learning_rate": 4.382536323793498e-05, "loss": 3.2281, "step": 393500 }, { "epoch": 2.47, "learning_rate": 4.381751453579636e-05, "loss": 3.2416, "step": 394000 }, { "epoch": 2.48, "learning_rate": 4.380968153106203e-05, "loss": 3.239, "step": 394500 }, { "epoch": 2.48, "learning_rate": 4.380183282892341e-05, "loss": 3.2323, "step": 395000 }, { "epoch": 2.48, "learning_rate": 4.37939841267848e-05, "loss": 3.2315, "step": 395500 }, { "epoch": 2.49, "learning_rate": 4.3786135424646185e-05, "loss": 3.24, "step": 396000 }, { "epoch": 2.49, "learning_rate": 4.3778286722507566e-05, "loss": 3.2343, "step": 396500 }, { "epoch": 2.49, "learning_rate": 4.3770438020368955e-05, "loss": 3.2459, "step": 397000 }, { "epoch": 2.5, "learning_rate": 4.376258931823034e-05, "loss": 3.2265, "step": 397500 }, { "epoch": 2.5, "learning_rate": 4.3754740616091724e-05, "loss": 3.2356, "step": 398000 }, { "epoch": 2.5, "learning_rate": 4.3746891913953105e-05, "loss": 3.2347, "step": 398500 }, { "epoch": 2.51, "learning_rate": 4.3739043211814493e-05, "loss": 3.2326, "step": 399000 }, { "epoch": 2.51, "learning_rate": 4.373119450967588e-05, "loss": 3.2332, "step": 399500 }, { "epoch": 2.51, "learning_rate": 4.372334580753727e-05, "loss": 3.2348, "step": 400000 }, { "epoch": 2.51, "learning_rate": 4.371551280280293e-05, "loss": 3.2389, "step": 400500 }, { "epoch": 2.52, "learning_rate": 4.370766410066431e-05, "loss": 3.2358, "step": 401000 }, { "epoch": 2.52, "learning_rate": 4.36998153985257e-05, "loss": 3.2294, "step": 401500 }, { "epoch": 2.52, "learning_rate": 4.369198239379136e-05, "loss": 3.2312, "step": 402000 }, { "epoch": 2.53, "learning_rate": 4.368413369165275e-05, "loss": 3.2352, "step": 402500 }, { "epoch": 2.53, "learning_rate": 4.367628498951414e-05, "loss": 3.2407, "step": 403000 }, { "epoch": 2.53, "learning_rate": 4.366843628737552e-05, "loss": 3.2356, "step": 403500 }, { "epoch": 2.54, "learning_rate": 4.366058758523691e-05, "loss": 3.2364, "step": 404000 }, { "epoch": 2.54, "learning_rate": 4.3652738883098296e-05, "loss": 3.2273, "step": 404500 }, { "epoch": 2.54, "learning_rate": 4.364490587836396e-05, "loss": 3.2377, "step": 405000 }, { "epoch": 2.55, "learning_rate": 4.3637057176225345e-05, "loss": 3.2301, "step": 405500 }, { "epoch": 2.55, "learning_rate": 4.362920847408673e-05, "loss": 3.2341, "step": 406000 }, { "epoch": 2.55, "learning_rate": 4.3621359771948115e-05, "loss": 3.235, "step": 406500 }, { "epoch": 2.56, "learning_rate": 4.3613526767213776e-05, "loss": 3.2323, "step": 407000 }, { "epoch": 2.56, "learning_rate": 4.3605678065075164e-05, "loss": 3.2263, "step": 407500 }, { "epoch": 2.56, "learning_rate": 4.3597829362936546e-05, "loss": 3.2259, "step": 408000 }, { "epoch": 2.56, "learning_rate": 4.358998066079793e-05, "loss": 3.2292, "step": 408500 }, { "epoch": 2.57, "learning_rate": 4.3582131958659315e-05, "loss": 3.2423, "step": 409000 }, { "epoch": 2.57, "learning_rate": 4.35742832565207e-05, "loss": 3.2383, "step": 409500 }, { "epoch": 2.57, "learning_rate": 4.3566450251786365e-05, "loss": 3.2268, "step": 410000 }, { "epoch": 2.58, "learning_rate": 4.355860154964775e-05, "loss": 3.2248, "step": 410500 }, { "epoch": 2.58, "learning_rate": 4.3550752847509134e-05, "loss": 3.2339, "step": 411000 }, { "epoch": 2.58, "learning_rate": 4.354290414537052e-05, "loss": 3.2251, "step": 411500 }, { "epoch": 2.59, "learning_rate": 4.3535071140636184e-05, "loss": 3.2322, "step": 412000 }, { "epoch": 2.59, "learning_rate": 4.352722243849757e-05, "loss": 3.2247, "step": 412500 }, { "epoch": 2.59, "learning_rate": 4.351937373635896e-05, "loss": 3.2311, "step": 413000 }, { "epoch": 2.6, "learning_rate": 4.351152503422034e-05, "loss": 3.2279, "step": 413500 }, { "epoch": 2.6, "learning_rate": 4.350367633208173e-05, "loss": 3.2266, "step": 414000 }, { "epoch": 2.6, "learning_rate": 4.349582762994312e-05, "loss": 3.2264, "step": 414500 }, { "epoch": 2.61, "learning_rate": 4.34879789278045e-05, "loss": 3.2313, "step": 415000 }, { "epoch": 2.61, "learning_rate": 4.348013022566589e-05, "loss": 3.2291, "step": 415500 }, { "epoch": 2.61, "learning_rate": 4.347229722093155e-05, "loss": 3.2298, "step": 416000 }, { "epoch": 2.62, "learning_rate": 4.346444851879293e-05, "loss": 3.2335, "step": 416500 }, { "epoch": 2.62, "learning_rate": 4.34566155140586e-05, "loss": 3.2265, "step": 417000 }, { "epoch": 2.62, "learning_rate": 4.344878250932426e-05, "loss": 3.2292, "step": 417500 }, { "epoch": 2.62, "learning_rate": 4.344093380718565e-05, "loss": 3.2221, "step": 418000 }, { "epoch": 2.63, "learning_rate": 4.3433085105047035e-05, "loss": 3.2283, "step": 418500 }, { "epoch": 2.63, "learning_rate": 4.342523640290842e-05, "loss": 3.2265, "step": 419000 }, { "epoch": 2.63, "learning_rate": 4.34173877007698e-05, "loss": 3.2304, "step": 419500 }, { "epoch": 2.64, "learning_rate": 4.340957039343974e-05, "loss": 3.2297, "step": 420000 }, { "epoch": 2.64, "eval_accuracy": 0.4306891776138236, "eval_loss": 3.1405692100524902, "eval_runtime": 1448.1366, "eval_samples_per_second": 57.868, "eval_steps_per_second": 5.787, "step": 420000 }, { "epoch": 2.64, "learning_rate": 4.340172169130113e-05, "loss": 3.2272, "step": 420500 }, { "epoch": 2.64, "learning_rate": 4.3393872989162516e-05, "loss": 3.2337, "step": 421000 }, { "epoch": 2.65, "learning_rate": 4.3386024287023904e-05, "loss": 3.2262, "step": 421500 }, { "epoch": 2.65, "learning_rate": 4.3378175584885285e-05, "loss": 3.2268, "step": 422000 }, { "epoch": 2.65, "learning_rate": 4.3370326882746673e-05, "loss": 3.2364, "step": 422500 }, { "epoch": 2.66, "learning_rate": 4.3362478180608055e-05, "loss": 3.2262, "step": 423000 }, { "epoch": 2.66, "learning_rate": 4.335462947846944e-05, "loss": 3.2236, "step": 423500 }, { "epoch": 2.66, "learning_rate": 4.3346780776330824e-05, "loss": 3.2296, "step": 424000 }, { "epoch": 2.67, "learning_rate": 4.333893207419221e-05, "loss": 3.2239, "step": 424500 }, { "epoch": 2.67, "learning_rate": 4.33310833720536e-05, "loss": 3.2242, "step": 425000 }, { "epoch": 2.67, "learning_rate": 4.332323466991499e-05, "loss": 3.2262, "step": 425500 }, { "epoch": 2.67, "learning_rate": 4.331540166518065e-05, "loss": 3.2219, "step": 426000 }, { "epoch": 2.68, "learning_rate": 4.330755296304203e-05, "loss": 3.2198, "step": 426500 }, { "epoch": 2.68, "learning_rate": 4.329970426090342e-05, "loss": 3.2267, "step": 427000 }, { "epoch": 2.68, "learning_rate": 4.329185555876481e-05, "loss": 3.2234, "step": 427500 }, { "epoch": 2.69, "learning_rate": 4.328400685662619e-05, "loss": 3.2247, "step": 428000 }, { "epoch": 2.69, "learning_rate": 4.327615815448758e-05, "loss": 3.2277, "step": 428500 }, { "epoch": 2.69, "learning_rate": 4.326830945234896e-05, "loss": 3.2329, "step": 429000 }, { "epoch": 2.7, "learning_rate": 4.326047644761462e-05, "loss": 3.2236, "step": 429500 }, { "epoch": 2.7, "learning_rate": 4.325262774547601e-05, "loss": 3.2193, "step": 430000 }, { "epoch": 2.7, "learning_rate": 4.3244779043337396e-05, "loss": 3.2229, "step": 430500 }, { "epoch": 2.71, "learning_rate": 4.3236930341198784e-05, "loss": 3.2245, "step": 431000 }, { "epoch": 2.71, "learning_rate": 4.3229081639060166e-05, "loss": 3.2264, "step": 431500 }, { "epoch": 2.71, "learning_rate": 4.3221232936921554e-05, "loss": 3.2165, "step": 432000 }, { "epoch": 2.72, "learning_rate": 4.321338423478294e-05, "loss": 3.2219, "step": 432500 }, { "epoch": 2.72, "learning_rate": 4.320553553264432e-05, "loss": 3.2243, "step": 433000 }, { "epoch": 2.72, "learning_rate": 4.319770252790999e-05, "loss": 3.2238, "step": 433500 }, { "epoch": 2.73, "learning_rate": 4.318985382577137e-05, "loss": 3.2199, "step": 434000 }, { "epoch": 2.73, "learning_rate": 4.3182005123632754e-05, "loss": 3.2181, "step": 434500 }, { "epoch": 2.73, "learning_rate": 4.317415642149414e-05, "loss": 3.2121, "step": 435000 }, { "epoch": 2.73, "learning_rate": 4.316630771935553e-05, "loss": 3.2224, "step": 435500 }, { "epoch": 2.74, "learning_rate": 4.315847471462119e-05, "loss": 3.2183, "step": 436000 }, { "epoch": 2.74, "learning_rate": 4.315062601248257e-05, "loss": 3.2214, "step": 436500 }, { "epoch": 2.74, "learning_rate": 4.314277731034396e-05, "loss": 3.2257, "step": 437000 }, { "epoch": 2.75, "learning_rate": 4.313492860820535e-05, "loss": 3.2255, "step": 437500 }, { "epoch": 2.75, "learning_rate": 4.312707990606674e-05, "loss": 3.2158, "step": 438000 }, { "epoch": 2.75, "learning_rate": 4.3119231203928126e-05, "loss": 3.2318, "step": 438500 }, { "epoch": 2.76, "learning_rate": 4.311138250178951e-05, "loss": 3.2203, "step": 439000 }, { "epoch": 2.76, "learning_rate": 4.310354949705517e-05, "loss": 3.2146, "step": 439500 }, { "epoch": 2.76, "learning_rate": 4.3095700794916556e-05, "loss": 3.2205, "step": 440000 }, { "epoch": 2.77, "learning_rate": 4.3087852092777945e-05, "loss": 3.2243, "step": 440500 }, { "epoch": 2.77, "learning_rate": 4.3080003390639326e-05, "loss": 3.2141, "step": 441000 }, { "epoch": 2.77, "learning_rate": 4.307215468850071e-05, "loss": 3.2142, "step": 441500 }, { "epoch": 2.78, "learning_rate": 4.3064305986362095e-05, "loss": 3.2189, "step": 442000 }, { "epoch": 2.78, "learning_rate": 4.305647298162776e-05, "loss": 3.2177, "step": 442500 }, { "epoch": 2.78, "learning_rate": 4.3048624279489145e-05, "loss": 3.2262, "step": 443000 }, { "epoch": 2.78, "learning_rate": 4.304077557735053e-05, "loss": 3.2186, "step": 443500 }, { "epoch": 2.79, "learning_rate": 4.3032926875211914e-05, "loss": 3.2207, "step": 444000 }, { "epoch": 2.79, "learning_rate": 4.30250781730733e-05, "loss": 3.2282, "step": 444500 }, { "epoch": 2.79, "learning_rate": 4.301722947093469e-05, "loss": 3.2137, "step": 445000 }, { "epoch": 2.8, "learning_rate": 4.300939646620035e-05, "loss": 3.2131, "step": 445500 }, { "epoch": 2.8, "learning_rate": 4.300154776406174e-05, "loss": 3.2202, "step": 446000 }, { "epoch": 2.8, "learning_rate": 4.299369906192312e-05, "loss": 3.2161, "step": 446500 }, { "epoch": 2.81, "learning_rate": 4.298585035978451e-05, "loss": 3.223, "step": 447000 }, { "epoch": 2.81, "learning_rate": 4.29780016576459e-05, "loss": 3.2124, "step": 447500 }, { "epoch": 2.81, "learning_rate": 4.297015295550728e-05, "loss": 3.216, "step": 448000 }, { "epoch": 2.82, "learning_rate": 4.296231995077295e-05, "loss": 3.2149, "step": 448500 }, { "epoch": 2.82, "learning_rate": 4.295447124863433e-05, "loss": 3.2121, "step": 449000 }, { "epoch": 2.82, "learning_rate": 4.294662254649571e-05, "loss": 3.216, "step": 449500 }, { "epoch": 2.83, "learning_rate": 4.29387738443571e-05, "loss": 3.2213, "step": 450000 }, { "epoch": 2.83, "eval_accuracy": 0.432139116801437, "eval_loss": 3.1286754608154297, "eval_runtime": 1444.4913, "eval_samples_per_second": 58.014, "eval_steps_per_second": 5.802, "step": 450000 }, { "epoch": 2.83, "learning_rate": 4.2930925142218486e-05, "loss": 3.2181, "step": 450500 }, { "epoch": 2.83, "learning_rate": 4.292309213748415e-05, "loss": 3.2126, "step": 451000 }, { "epoch": 2.83, "learning_rate": 4.291524343534553e-05, "loss": 3.2206, "step": 451500 }, { "epoch": 2.84, "learning_rate": 4.290739473320692e-05, "loss": 3.2216, "step": 452000 }, { "epoch": 2.84, "learning_rate": 4.2899546031068305e-05, "loss": 3.2166, "step": 452500 }, { "epoch": 2.84, "learning_rate": 4.289169732892969e-05, "loss": 3.2146, "step": 453000 }, { "epoch": 2.85, "learning_rate": 4.2883848626791075e-05, "loss": 3.2194, "step": 453500 }, { "epoch": 2.85, "learning_rate": 4.287599992465246e-05, "loss": 3.2181, "step": 454000 }, { "epoch": 2.85, "learning_rate": 4.2868151222513844e-05, "loss": 3.2066, "step": 454500 }, { "epoch": 2.86, "learning_rate": 4.286031821777951e-05, "loss": 3.2163, "step": 455000 }, { "epoch": 2.86, "learning_rate": 4.2852485213045174e-05, "loss": 3.2167, "step": 455500 }, { "epoch": 2.86, "learning_rate": 4.2844652208310835e-05, "loss": 3.2194, "step": 456000 }, { "epoch": 2.87, "learning_rate": 4.283680350617222e-05, "loss": 3.215, "step": 456500 }, { "epoch": 2.87, "learning_rate": 4.2828954804033605e-05, "loss": 3.2165, "step": 457000 }, { "epoch": 2.87, "learning_rate": 4.282110610189499e-05, "loss": 3.2177, "step": 457500 }, { "epoch": 2.88, "learning_rate": 4.281325739975638e-05, "loss": 3.2143, "step": 458000 }, { "epoch": 2.88, "learning_rate": 4.280540869761777e-05, "loss": 3.2182, "step": 458500 }, { "epoch": 2.88, "learning_rate": 4.279755999547915e-05, "loss": 3.2127, "step": 459000 }, { "epoch": 2.89, "learning_rate": 4.278971129334053e-05, "loss": 3.217, "step": 459500 }, { "epoch": 2.89, "learning_rate": 4.278186259120192e-05, "loss": 3.2183, "step": 460000 }, { "epoch": 2.89, "learning_rate": 4.277401388906331e-05, "loss": 3.2135, "step": 460500 }, { "epoch": 2.89, "learning_rate": 4.2766165186924696e-05, "loss": 3.2143, "step": 461000 }, { "epoch": 2.9, "learning_rate": 4.275833218219035e-05, "loss": 3.2128, "step": 461500 }, { "epoch": 2.9, "learning_rate": 4.275048348005174e-05, "loss": 3.2032, "step": 462000 }, { "epoch": 2.9, "learning_rate": 4.27426504753174e-05, "loss": 3.2131, "step": 462500 }, { "epoch": 2.91, "learning_rate": 4.273480177317879e-05, "loss": 3.2168, "step": 463000 }, { "epoch": 2.91, "learning_rate": 4.2726953071040176e-05, "loss": 3.2085, "step": 463500 }, { "epoch": 2.91, "learning_rate": 4.271910436890156e-05, "loss": 3.2097, "step": 464000 }, { "epoch": 2.92, "learning_rate": 4.2711255666762946e-05, "loss": 3.2127, "step": 464500 }, { "epoch": 2.92, "learning_rate": 4.2703406964624334e-05, "loss": 3.2111, "step": 465000 }, { "epoch": 2.92, "learning_rate": 4.2695558262485715e-05, "loss": 3.2149, "step": 465500 }, { "epoch": 2.93, "learning_rate": 4.2687725257751383e-05, "loss": 3.2133, "step": 466000 }, { "epoch": 2.93, "learning_rate": 4.2679876555612765e-05, "loss": 3.217, "step": 466500 }, { "epoch": 2.93, "learning_rate": 4.2672043550878426e-05, "loss": 3.2127, "step": 467000 }, { "epoch": 2.94, "learning_rate": 4.2664194848739814e-05, "loss": 3.2112, "step": 467500 }, { "epoch": 2.94, "learning_rate": 4.26563461466012e-05, "loss": 3.2145, "step": 468000 }, { "epoch": 2.94, "learning_rate": 4.2648513141866864e-05, "loss": 3.2158, "step": 468500 }, { "epoch": 2.94, "learning_rate": 4.264066443972825e-05, "loss": 3.2057, "step": 469000 }, { "epoch": 2.95, "learning_rate": 4.263281573758963e-05, "loss": 3.2126, "step": 469500 }, { "epoch": 2.95, "learning_rate": 4.262496703545102e-05, "loss": 3.2129, "step": 470000 }, { "epoch": 2.95, "learning_rate": 4.26171183333124e-05, "loss": 3.2136, "step": 470500 }, { "epoch": 2.96, "learning_rate": 4.260926963117379e-05, "loss": 3.2127, "step": 471000 }, { "epoch": 2.96, "learning_rate": 4.260142092903517e-05, "loss": 3.2146, "step": 471500 }, { "epoch": 2.96, "learning_rate": 4.259357222689656e-05, "loss": 3.2146, "step": 472000 }, { "epoch": 2.97, "learning_rate": 4.258572352475795e-05, "loss": 3.2095, "step": 472500 }, { "epoch": 2.97, "learning_rate": 4.257789052002361e-05, "loss": 3.2069, "step": 473000 }, { "epoch": 2.97, "learning_rate": 4.2570041817885e-05, "loss": 3.2028, "step": 473500 }, { "epoch": 2.98, "learning_rate": 4.2562193115746386e-05, "loss": 3.2116, "step": 474000 }, { "epoch": 2.98, "learning_rate": 4.255434441360777e-05, "loss": 3.204, "step": 474500 }, { "epoch": 2.98, "learning_rate": 4.2546495711469156e-05, "loss": 3.2116, "step": 475000 }, { "epoch": 2.99, "learning_rate": 4.253864700933054e-05, "loss": 3.207, "step": 475500 }, { "epoch": 2.99, "learning_rate": 4.2530798307191925e-05, "loss": 3.2093, "step": 476000 }, { "epoch": 2.99, "learning_rate": 4.2522949605053306e-05, "loss": 3.2075, "step": 476500 }, { "epoch": 3.0, "learning_rate": 4.2515100902914695e-05, "loss": 3.2141, "step": 477000 }, { "epoch": 3.0, "learning_rate": 4.250725220077608e-05, "loss": 3.2082, "step": 477500 }, { "epoch": 3.0, "learning_rate": 4.2499419196041744e-05, "loss": 3.2028, "step": 478000 }, { "epoch": 3.0, "learning_rate": 4.2491586191307405e-05, "loss": 3.1807, "step": 478500 }, { "epoch": 3.01, "learning_rate": 4.2483737489168794e-05, "loss": 3.1975, "step": 479000 }, { "epoch": 3.01, "learning_rate": 4.2475888787030175e-05, "loss": 3.1888, "step": 479500 }, { "epoch": 3.01, "learning_rate": 4.246804008489156e-05, "loss": 3.1885, "step": 480000 }, { "epoch": 3.01, "eval_accuracy": 0.43337159019010535, "eval_loss": 3.118159055709839, "eval_runtime": 1446.7147, "eval_samples_per_second": 57.925, "eval_steps_per_second": 5.793, "step": 480000 }, { "epoch": 3.02, "learning_rate": 4.246019138275295e-05, "loss": 3.1857, "step": 480500 }, { "epoch": 3.02, "learning_rate": 4.245234268061434e-05, "loss": 3.1849, "step": 481000 }, { "epoch": 3.02, "learning_rate": 4.244449397847572e-05, "loss": 3.191, "step": 481500 }, { "epoch": 3.03, "learning_rate": 4.243664527633711e-05, "loss": 3.1871, "step": 482000 }, { "epoch": 3.03, "learning_rate": 4.242879657419849e-05, "loss": 3.1896, "step": 482500 }, { "epoch": 3.03, "learning_rate": 4.242094787205988e-05, "loss": 3.1924, "step": 483000 }, { "epoch": 3.04, "learning_rate": 4.2413099169921266e-05, "loss": 3.1881, "step": 483500 }, { "epoch": 3.04, "learning_rate": 4.240525046778265e-05, "loss": 3.1933, "step": 484000 }, { "epoch": 3.04, "learning_rate": 4.2397401765644036e-05, "loss": 3.1915, "step": 484500 }, { "epoch": 3.05, "learning_rate": 4.2389553063505424e-05, "loss": 3.1869, "step": 485000 }, { "epoch": 3.05, "learning_rate": 4.2381720058771085e-05, "loss": 3.1892, "step": 485500 }, { "epoch": 3.05, "learning_rate": 4.237388705403675e-05, "loss": 3.1895, "step": 486000 }, { "epoch": 3.05, "learning_rate": 4.2366054049302415e-05, "loss": 3.1859, "step": 486500 }, { "epoch": 3.06, "learning_rate": 4.2358205347163796e-05, "loss": 3.1934, "step": 487000 }, { "epoch": 3.06, "learning_rate": 4.235035664502518e-05, "loss": 3.1865, "step": 487500 }, { "epoch": 3.06, "learning_rate": 4.2342507942886566e-05, "loss": 3.1937, "step": 488000 }, { "epoch": 3.07, "learning_rate": 4.233467493815223e-05, "loss": 3.1911, "step": 488500 }, { "epoch": 3.07, "learning_rate": 4.2326826236013615e-05, "loss": 3.1952, "step": 489000 }, { "epoch": 3.07, "learning_rate": 4.2318977533875e-05, "loss": 3.1991, "step": 489500 }, { "epoch": 3.08, "learning_rate": 4.2311128831736385e-05, "loss": 3.1839, "step": 490000 }, { "epoch": 3.08, "learning_rate": 4.230328012959777e-05, "loss": 3.1905, "step": 490500 }, { "epoch": 3.08, "learning_rate": 4.229543142745916e-05, "loss": 3.1912, "step": 491000 }, { "epoch": 3.09, "learning_rate": 4.228758272532054e-05, "loss": 3.2007, "step": 491500 }, { "epoch": 3.09, "learning_rate": 4.227973402318193e-05, "loss": 3.1898, "step": 492000 }, { "epoch": 3.09, "learning_rate": 4.227188532104331e-05, "loss": 3.1973, "step": 492500 }, { "epoch": 3.1, "learning_rate": 4.22640366189047e-05, "loss": 3.1941, "step": 493000 }, { "epoch": 3.1, "learning_rate": 4.225618791676609e-05, "loss": 3.1846, "step": 493500 }, { "epoch": 3.1, "learning_rate": 4.224833921462747e-05, "loss": 3.1949, "step": 494000 }, { "epoch": 3.1, "learning_rate": 4.224050620989313e-05, "loss": 3.1864, "step": 494500 }, { "epoch": 3.11, "learning_rate": 4.223268890256307e-05, "loss": 3.1948, "step": 495000 }, { "epoch": 3.11, "learning_rate": 4.222484020042446e-05, "loss": 3.179, "step": 495500 }, { "epoch": 3.11, "learning_rate": 4.221699149828585e-05, "loss": 3.1875, "step": 496000 }, { "epoch": 3.12, "learning_rate": 4.220914279614723e-05, "loss": 3.185, "step": 496500 }, { "epoch": 3.12, "learning_rate": 4.220129409400861e-05, "loss": 3.1996, "step": 497000 }, { "epoch": 3.12, "learning_rate": 4.219344539187e-05, "loss": 3.1897, "step": 497500 }, { "epoch": 3.13, "learning_rate": 4.218561238713567e-05, "loss": 3.1896, "step": 498000 }, { "epoch": 3.13, "learning_rate": 4.217776368499705e-05, "loss": 3.1901, "step": 498500 }, { "epoch": 3.13, "learning_rate": 4.216991498285844e-05, "loss": 3.193, "step": 499000 }, { "epoch": 3.14, "learning_rate": 4.216206628071982e-05, "loss": 3.1907, "step": 499500 }, { "epoch": 3.14, "learning_rate": 4.2154217578581206e-05, "loss": 3.1965, "step": 500000 }, { "epoch": 3.14, "learning_rate": 4.2146368876442595e-05, "loss": 3.1897, "step": 500500 }, { "epoch": 3.15, "learning_rate": 4.2138535871708256e-05, "loss": 3.2017, "step": 501000 }, { "epoch": 3.15, "learning_rate": 4.2130687169569644e-05, "loss": 3.1954, "step": 501500 }, { "epoch": 3.15, "learning_rate": 4.2122838467431025e-05, "loss": 3.1889, "step": 502000 }, { "epoch": 3.16, "learning_rate": 4.2114989765292414e-05, "loss": 3.1836, "step": 502500 }, { "epoch": 3.16, "learning_rate": 4.21071410631538e-05, "loss": 3.1956, "step": 503000 }, { "epoch": 3.16, "learning_rate": 4.209929236101518e-05, "loss": 3.1821, "step": 503500 }, { "epoch": 3.16, "learning_rate": 4.209144365887657e-05, "loss": 3.1924, "step": 504000 }, { "epoch": 3.17, "learning_rate": 4.208359495673795e-05, "loss": 3.1916, "step": 504500 }, { "epoch": 3.17, "learning_rate": 4.207574625459934e-05, "loss": 3.1874, "step": 505000 }, { "epoch": 3.17, "learning_rate": 4.206789755246073e-05, "loss": 3.1885, "step": 505500 }, { "epoch": 3.18, "learning_rate": 4.206004885032212e-05, "loss": 3.1928, "step": 506000 }, { "epoch": 3.18, "learning_rate": 4.20522001481835e-05, "loss": 3.1914, "step": 506500 }, { "epoch": 3.18, "learning_rate": 4.204435144604488e-05, "loss": 3.1944, "step": 507000 }, { "epoch": 3.19, "learning_rate": 4.203650274390627e-05, "loss": 3.1916, "step": 507500 }, { "epoch": 3.19, "learning_rate": 4.2028669739171936e-05, "loss": 3.1931, "step": 508000 }, { "epoch": 3.19, "learning_rate": 4.202082103703332e-05, "loss": 3.1927, "step": 508500 }, { "epoch": 3.2, "learning_rate": 4.20129723348947e-05, "loss": 3.196, "step": 509000 }, { "epoch": 3.2, "learning_rate": 4.200512363275609e-05, "loss": 3.191, "step": 509500 }, { "epoch": 3.2, "learning_rate": 4.1997274930617475e-05, "loss": 3.1895, "step": 510000 }, { "epoch": 3.2, "eval_accuracy": 0.4343495272273934, "eval_loss": 3.1100406646728516, "eval_runtime": 1453.8123, "eval_samples_per_second": 57.642, "eval_steps_per_second": 5.765, "step": 510000 }, { "epoch": 3.21, "learning_rate": 4.1989441925883136e-05, "loss": 3.1912, "step": 510500 }, { "epoch": 3.21, "learning_rate": 4.1981593223744524e-05, "loss": 3.1896, "step": 511000 }, { "epoch": 3.21, "learning_rate": 4.1973744521605906e-05, "loss": 3.1904, "step": 511500 }, { "epoch": 3.21, "learning_rate": 4.1965895819467294e-05, "loss": 3.1902, "step": 512000 }, { "epoch": 3.22, "learning_rate": 4.1958062814732955e-05, "loss": 3.1868, "step": 512500 }, { "epoch": 3.22, "learning_rate": 4.195021411259434e-05, "loss": 3.181, "step": 513000 }, { "epoch": 3.22, "learning_rate": 4.194236541045573e-05, "loss": 3.1927, "step": 513500 }, { "epoch": 3.23, "learning_rate": 4.193451670831711e-05, "loss": 3.1901, "step": 514000 }, { "epoch": 3.23, "learning_rate": 4.19266680061785e-05, "loss": 3.1862, "step": 514500 }, { "epoch": 3.23, "learning_rate": 4.191881930403989e-05, "loss": 3.1886, "step": 515000 }, { "epoch": 3.24, "learning_rate": 4.191097060190127e-05, "loss": 3.1869, "step": 515500 }, { "epoch": 3.24, "learning_rate": 4.190313759716694e-05, "loss": 3.1907, "step": 516000 }, { "epoch": 3.24, "learning_rate": 4.18953045924326e-05, "loss": 3.1859, "step": 516500 }, { "epoch": 3.25, "learning_rate": 4.188745589029398e-05, "loss": 3.184, "step": 517000 }, { "epoch": 3.25, "learning_rate": 4.187960718815537e-05, "loss": 3.1918, "step": 517500 }, { "epoch": 3.25, "learning_rate": 4.187175848601676e-05, "loss": 3.1926, "step": 518000 }, { "epoch": 3.26, "learning_rate": 4.186390978387814e-05, "loss": 3.1824, "step": 518500 }, { "epoch": 3.26, "learning_rate": 4.185606108173953e-05, "loss": 3.1891, "step": 519000 }, { "epoch": 3.26, "learning_rate": 4.184821237960091e-05, "loss": 3.1908, "step": 519500 }, { "epoch": 3.27, "learning_rate": 4.1840363677462297e-05, "loss": 3.1874, "step": 520000 }, { "epoch": 3.27, "learning_rate": 4.183253067272796e-05, "loss": 3.1924, "step": 520500 }, { "epoch": 3.27, "learning_rate": 4.1824681970589346e-05, "loss": 3.1827, "step": 521000 }, { "epoch": 3.27, "learning_rate": 4.1816833268450734e-05, "loss": 3.1927, "step": 521500 }, { "epoch": 3.28, "learning_rate": 4.1808984566312116e-05, "loss": 3.1873, "step": 522000 }, { "epoch": 3.28, "learning_rate": 4.180115156157778e-05, "loss": 3.1849, "step": 522500 }, { "epoch": 3.28, "learning_rate": 4.1793302859439165e-05, "loss": 3.1891, "step": 523000 }, { "epoch": 3.29, "learning_rate": 4.178545415730055e-05, "loss": 3.186, "step": 523500 }, { "epoch": 3.29, "learning_rate": 4.177760545516194e-05, "loss": 3.1824, "step": 524000 }, { "epoch": 3.29, "learning_rate": 4.176975675302332e-05, "loss": 3.1898, "step": 524500 }, { "epoch": 3.3, "learning_rate": 4.1761908050884704e-05, "loss": 3.1821, "step": 525000 }, { "epoch": 3.3, "learning_rate": 4.175405934874609e-05, "loss": 3.1847, "step": 525500 }, { "epoch": 3.3, "learning_rate": 4.174621064660748e-05, "loss": 3.1824, "step": 526000 }, { "epoch": 3.31, "learning_rate": 4.173836194446886e-05, "loss": 3.1875, "step": 526500 }, { "epoch": 3.31, "learning_rate": 4.173052893973452e-05, "loss": 3.1907, "step": 527000 }, { "epoch": 3.31, "learning_rate": 4.172268023759591e-05, "loss": 3.1948, "step": 527500 }, { "epoch": 3.32, "learning_rate": 4.17148315354573e-05, "loss": 3.1877, "step": 528000 }, { "epoch": 3.32, "learning_rate": 4.170698283331869e-05, "loss": 3.1851, "step": 528500 }, { "epoch": 3.32, "learning_rate": 4.169914982858435e-05, "loss": 3.1878, "step": 529000 }, { "epoch": 3.32, "learning_rate": 4.169130112644573e-05, "loss": 3.1874, "step": 529500 }, { "epoch": 3.33, "learning_rate": 4.168346812171139e-05, "loss": 3.1897, "step": 530000 }, { "epoch": 3.33, "learning_rate": 4.167563511697706e-05, "loss": 3.1851, "step": 530500 }, { "epoch": 3.33, "learning_rate": 4.166778641483845e-05, "loss": 3.1879, "step": 531000 }, { "epoch": 3.34, "learning_rate": 4.165993771269983e-05, "loss": 3.188, "step": 531500 }, { "epoch": 3.34, "learning_rate": 4.165208901056122e-05, "loss": 3.182, "step": 532000 }, { "epoch": 3.34, "learning_rate": 4.16442403084226e-05, "loss": 3.1868, "step": 532500 }, { "epoch": 3.35, "learning_rate": 4.163639160628399e-05, "loss": 3.1821, "step": 533000 }, { "epoch": 3.35, "learning_rate": 4.1628542904145375e-05, "loss": 3.1875, "step": 533500 }, { "epoch": 3.35, "learning_rate": 4.162069420200676e-05, "loss": 3.1874, "step": 534000 }, { "epoch": 3.36, "learning_rate": 4.1612845499868144e-05, "loss": 3.1846, "step": 534500 }, { "epoch": 3.36, "learning_rate": 4.1604996797729526e-05, "loss": 3.1855, "step": 535000 }, { "epoch": 3.36, "learning_rate": 4.1597163792995194e-05, "loss": 3.1855, "step": 535500 }, { "epoch": 3.37, "learning_rate": 4.158931509085658e-05, "loss": 3.1902, "step": 536000 }, { "epoch": 3.37, "learning_rate": 4.158146638871796e-05, "loss": 3.188, "step": 536500 }, { "epoch": 3.37, "learning_rate": 4.1573617686579345e-05, "loss": 3.1865, "step": 537000 }, { "epoch": 3.37, "learning_rate": 4.156576898444073e-05, "loss": 3.1867, "step": 537500 }, { "epoch": 3.38, "learning_rate": 4.155792028230212e-05, "loss": 3.1858, "step": 538000 }, { "epoch": 3.38, "learning_rate": 4.155007158016351e-05, "loss": 3.1825, "step": 538500 }, { "epoch": 3.38, "learning_rate": 4.15422228780249e-05, "loss": 3.1864, "step": 539000 }, { "epoch": 3.39, "learning_rate": 4.153437417588628e-05, "loss": 3.178, "step": 539500 }, { "epoch": 3.39, "learning_rate": 4.152652547374766e-05, "loss": 3.1816, "step": 540000 }, { "epoch": 3.39, "eval_accuracy": 0.43550916596237865, "eval_loss": 3.102531909942627, "eval_runtime": 1450.3986, "eval_samples_per_second": 57.778, "eval_steps_per_second": 5.778, "step": 540000 }, { "epoch": 3.39, "learning_rate": 4.151869246901333e-05, "loss": 3.1818, "step": 540500 }, { "epoch": 3.4, "learning_rate": 4.151085946427899e-05, "loss": 3.1906, "step": 541000 }, { "epoch": 3.4, "learning_rate": 4.150301076214038e-05, "loss": 3.1925, "step": 541500 }, { "epoch": 3.4, "learning_rate": 4.149516206000176e-05, "loss": 3.1768, "step": 542000 }, { "epoch": 3.41, "learning_rate": 4.148731335786315e-05, "loss": 3.1829, "step": 542500 }, { "epoch": 3.41, "learning_rate": 4.147946465572453e-05, "loss": 3.1817, "step": 543000 }, { "epoch": 3.41, "learning_rate": 4.1471615953585916e-05, "loss": 3.1827, "step": 543500 }, { "epoch": 3.42, "learning_rate": 4.1463767251447305e-05, "loss": 3.1844, "step": 544000 }, { "epoch": 3.42, "learning_rate": 4.1455934246712966e-05, "loss": 3.1851, "step": 544500 }, { "epoch": 3.42, "learning_rate": 4.144808554457435e-05, "loss": 3.1817, "step": 545000 }, { "epoch": 3.43, "learning_rate": 4.1440252539840015e-05, "loss": 3.1871, "step": 545500 }, { "epoch": 3.43, "learning_rate": 4.14324038377014e-05, "loss": 3.181, "step": 546000 }, { "epoch": 3.43, "learning_rate": 4.1424555135562785e-05, "loss": 3.1806, "step": 546500 }, { "epoch": 3.43, "learning_rate": 4.1416706433424166e-05, "loss": 3.1843, "step": 547000 }, { "epoch": 3.44, "learning_rate": 4.1408857731285554e-05, "loss": 3.1864, "step": 547500 }, { "epoch": 3.44, "learning_rate": 4.1401024726551216e-05, "loss": 3.1822, "step": 548000 }, { "epoch": 3.44, "learning_rate": 4.1393176024412604e-05, "loss": 3.1828, "step": 548500 }, { "epoch": 3.45, "learning_rate": 4.138532732227399e-05, "loss": 3.1831, "step": 549000 }, { "epoch": 3.45, "learning_rate": 4.1377478620135373e-05, "loss": 3.1817, "step": 549500 }, { "epoch": 3.45, "learning_rate": 4.136962991799676e-05, "loss": 3.184, "step": 550000 }, { "epoch": 3.46, "learning_rate": 4.136178121585815e-05, "loss": 3.1804, "step": 550500 }, { "epoch": 3.46, "learning_rate": 4.135393251371953e-05, "loss": 3.1831, "step": 551000 }, { "epoch": 3.46, "learning_rate": 4.134608381158092e-05, "loss": 3.1853, "step": 551500 }, { "epoch": 3.47, "learning_rate": 4.13382351094423e-05, "loss": 3.183, "step": 552000 }, { "epoch": 3.47, "learning_rate": 4.133038640730369e-05, "loss": 3.1787, "step": 552500 }, { "epoch": 3.47, "learning_rate": 4.132253770516508e-05, "loss": 3.1883, "step": 553000 }, { "epoch": 3.48, "learning_rate": 4.1314689003026465e-05, "loss": 3.1852, "step": 553500 }, { "epoch": 3.48, "learning_rate": 4.1306855998292126e-05, "loss": 3.1787, "step": 554000 }, { "epoch": 3.48, "learning_rate": 4.129902299355779e-05, "loss": 3.181, "step": 554500 }, { "epoch": 3.48, "learning_rate": 4.129117429141917e-05, "loss": 3.179, "step": 555000 }, { "epoch": 3.49, "learning_rate": 4.128332558928056e-05, "loss": 3.1775, "step": 555500 }, { "epoch": 3.49, "learning_rate": 4.1275476887141945e-05, "loss": 3.1825, "step": 556000 }, { "epoch": 3.49, "learning_rate": 4.1267628185003333e-05, "loss": 3.1767, "step": 556500 }, { "epoch": 3.5, "learning_rate": 4.1259779482864715e-05, "loss": 3.1809, "step": 557000 }, { "epoch": 3.5, "learning_rate": 4.1251946478130376e-05, "loss": 3.1805, "step": 557500 }, { "epoch": 3.5, "learning_rate": 4.1244097775991764e-05, "loss": 3.1742, "step": 558000 }, { "epoch": 3.51, "learning_rate": 4.123624907385315e-05, "loss": 3.1821, "step": 558500 }, { "epoch": 3.51, "learning_rate": 4.122840037171454e-05, "loss": 3.1817, "step": 559000 }, { "epoch": 3.51, "learning_rate": 4.122055166957592e-05, "loss": 3.1881, "step": 559500 }, { "epoch": 3.52, "learning_rate": 4.12127029674373e-05, "loss": 3.1844, "step": 560000 }, { "epoch": 3.52, "learning_rate": 4.120485426529869e-05, "loss": 3.1768, "step": 560500 }, { "epoch": 3.52, "learning_rate": 4.119700556316008e-05, "loss": 3.183, "step": 561000 }, { "epoch": 3.53, "learning_rate": 4.118915686102147e-05, "loss": 3.1843, "step": 561500 }, { "epoch": 3.53, "learning_rate": 4.118130815888285e-05, "loss": 3.1804, "step": 562000 }, { "epoch": 3.53, "learning_rate": 4.117347515414851e-05, "loss": 3.185, "step": 562500 }, { "epoch": 3.54, "learning_rate": 4.11656264520099e-05, "loss": 3.1792, "step": 563000 }, { "epoch": 3.54, "learning_rate": 4.1157777749871287e-05, "loss": 3.1854, "step": 563500 }, { "epoch": 3.54, "learning_rate": 4.1149929047732675e-05, "loss": 3.1803, "step": 564000 }, { "epoch": 3.54, "learning_rate": 4.1142080345594056e-05, "loss": 3.1812, "step": 564500 }, { "epoch": 3.55, "learning_rate": 4.113423164345544e-05, "loss": 3.1778, "step": 565000 }, { "epoch": 3.55, "learning_rate": 4.1126382941316826e-05, "loss": 3.1786, "step": 565500 }, { "epoch": 3.55, "learning_rate": 4.1118534239178214e-05, "loss": 3.1784, "step": 566000 }, { "epoch": 3.56, "learning_rate": 4.1110701234443875e-05, "loss": 3.182, "step": 566500 }, { "epoch": 3.56, "learning_rate": 4.1102852532305256e-05, "loss": 3.1825, "step": 567000 }, { "epoch": 3.56, "learning_rate": 4.1095003830166645e-05, "loss": 3.1776, "step": 567500 }, { "epoch": 3.57, "learning_rate": 4.108715512802803e-05, "loss": 3.1848, "step": 568000 }, { "epoch": 3.57, "learning_rate": 4.107930642588942e-05, "loss": 3.1748, "step": 568500 }, { "epoch": 3.57, "learning_rate": 4.1071504815963635e-05, "loss": 3.178, "step": 569000 }, { "epoch": 3.58, "learning_rate": 4.1063656113825024e-05, "loss": 3.1805, "step": 569500 }, { "epoch": 3.58, "learning_rate": 4.1055807411686405e-05, "loss": 3.1786, "step": 570000 }, { "epoch": 3.58, "eval_accuracy": 0.436555493386365, "eval_loss": 3.0939366817474365, "eval_runtime": 1450.5705, "eval_samples_per_second": 57.771, "eval_steps_per_second": 5.778, "step": 570000 }, { "epoch": 3.58, "learning_rate": 4.104795870954779e-05, "loss": 3.1761, "step": 570500 }, { "epoch": 3.59, "learning_rate": 4.1040110007409174e-05, "loss": 3.1845, "step": 571000 }, { "epoch": 3.59, "learning_rate": 4.103226130527056e-05, "loss": 3.177, "step": 571500 }, { "epoch": 3.59, "learning_rate": 4.1024412603131944e-05, "loss": 3.1744, "step": 572000 }, { "epoch": 3.59, "learning_rate": 4.101656390099333e-05, "loss": 3.1776, "step": 572500 }, { "epoch": 3.6, "learning_rate": 4.100871519885472e-05, "loss": 3.1789, "step": 573000 }, { "epoch": 3.6, "learning_rate": 4.100086649671611e-05, "loss": 3.1797, "step": 573500 }, { "epoch": 3.6, "learning_rate": 4.099303349198177e-05, "loss": 3.184, "step": 574000 }, { "epoch": 3.61, "learning_rate": 4.098518478984316e-05, "loss": 3.1791, "step": 574500 }, { "epoch": 3.61, "learning_rate": 4.097733608770454e-05, "loss": 3.1737, "step": 575000 }, { "epoch": 3.61, "learning_rate": 4.096948738556593e-05, "loss": 3.1789, "step": 575500 }, { "epoch": 3.62, "learning_rate": 4.096165438083159e-05, "loss": 3.1731, "step": 576000 }, { "epoch": 3.62, "learning_rate": 4.095382137609725e-05, "loss": 3.1746, "step": 576500 }, { "epoch": 3.62, "learning_rate": 4.094597267395864e-05, "loss": 3.1774, "step": 577000 }, { "epoch": 3.63, "learning_rate": 4.093812397182002e-05, "loss": 3.1741, "step": 577500 }, { "epoch": 3.63, "learning_rate": 4.093027526968141e-05, "loss": 3.1776, "step": 578000 }, { "epoch": 3.63, "learning_rate": 4.0922426567542796e-05, "loss": 3.1742, "step": 578500 }, { "epoch": 3.64, "learning_rate": 4.091457786540418e-05, "loss": 3.1776, "step": 579000 }, { "epoch": 3.64, "learning_rate": 4.0906744860669845e-05, "loss": 3.1818, "step": 579500 }, { "epoch": 3.64, "learning_rate": 4.0898896158531227e-05, "loss": 3.1755, "step": 580000 }, { "epoch": 3.64, "learning_rate": 4.0891047456392615e-05, "loss": 3.1757, "step": 580500 }, { "epoch": 3.65, "learning_rate": 4.0883198754253996e-05, "loss": 3.1821, "step": 581000 }, { "epoch": 3.65, "learning_rate": 4.0875350052115384e-05, "loss": 3.1804, "step": 581500 }, { "epoch": 3.65, "learning_rate": 4.086750134997677e-05, "loss": 3.1757, "step": 582000 }, { "epoch": 3.66, "learning_rate": 4.0859652647838154e-05, "loss": 3.1684, "step": 582500 }, { "epoch": 3.66, "learning_rate": 4.085180394569954e-05, "loss": 3.1802, "step": 583000 }, { "epoch": 3.66, "learning_rate": 4.084395524356093e-05, "loss": 3.1731, "step": 583500 }, { "epoch": 3.67, "learning_rate": 4.083612223882659e-05, "loss": 3.1835, "step": 584000 }, { "epoch": 3.67, "learning_rate": 4.082827353668798e-05, "loss": 3.1811, "step": 584500 }, { "epoch": 3.67, "learning_rate": 4.082042483454936e-05, "loss": 3.1676, "step": 585000 }, { "epoch": 3.68, "learning_rate": 4.081257613241075e-05, "loss": 3.1805, "step": 585500 }, { "epoch": 3.68, "learning_rate": 4.080472743027213e-05, "loss": 3.1741, "step": 586000 }, { "epoch": 3.68, "learning_rate": 4.07968944255378e-05, "loss": 3.1724, "step": 586500 }, { "epoch": 3.69, "learning_rate": 4.078904572339918e-05, "loss": 3.171, "step": 587000 }, { "epoch": 3.69, "learning_rate": 4.078119702126056e-05, "loss": 3.1766, "step": 587500 }, { "epoch": 3.69, "learning_rate": 4.077334831912195e-05, "loss": 3.1812, "step": 588000 }, { "epoch": 3.7, "learning_rate": 4.076549961698334e-05, "loss": 3.1752, "step": 588500 }, { "epoch": 3.7, "learning_rate": 4.0757666612249e-05, "loss": 3.1781, "step": 589000 }, { "epoch": 3.7, "learning_rate": 4.074981791011039e-05, "loss": 3.1696, "step": 589500 }, { "epoch": 3.7, "learning_rate": 4.074198490537605e-05, "loss": 3.1747, "step": 590000 }, { "epoch": 3.71, "learning_rate": 4.073413620323743e-05, "loss": 3.166, "step": 590500 }, { "epoch": 3.71, "learning_rate": 4.072628750109882e-05, "loss": 3.1771, "step": 591000 }, { "epoch": 3.71, "learning_rate": 4.0718438798960206e-05, "loss": 3.1746, "step": 591500 }, { "epoch": 3.72, "learning_rate": 4.0710590096821594e-05, "loss": 3.1723, "step": 592000 }, { "epoch": 3.72, "learning_rate": 4.0702741394682975e-05, "loss": 3.1746, "step": 592500 }, { "epoch": 3.72, "learning_rate": 4.0694892692544363e-05, "loss": 3.1728, "step": 593000 }, { "epoch": 3.73, "learning_rate": 4.0687059687810025e-05, "loss": 3.1782, "step": 593500 }, { "epoch": 3.73, "learning_rate": 4.067921098567141e-05, "loss": 3.1785, "step": 594000 }, { "epoch": 3.73, "learning_rate": 4.06713622835328e-05, "loss": 3.177, "step": 594500 }, { "epoch": 3.74, "learning_rate": 4.066351358139418e-05, "loss": 3.1815, "step": 595000 }, { "epoch": 3.74, "learning_rate": 4.065566487925557e-05, "loss": 3.171, "step": 595500 }, { "epoch": 3.74, "learning_rate": 4.064783187452123e-05, "loss": 3.1686, "step": 596000 }, { "epoch": 3.75, "learning_rate": 4.063998317238262e-05, "loss": 3.1717, "step": 596500 }, { "epoch": 3.75, "learning_rate": 4.0632134470244e-05, "loss": 3.1734, "step": 597000 }, { "epoch": 3.75, "learning_rate": 4.062428576810538e-05, "loss": 3.1757, "step": 597500 }, { "epoch": 3.75, "learning_rate": 4.061643706596677e-05, "loss": 3.1779, "step": 598000 }, { "epoch": 3.76, "learning_rate": 4.060858836382816e-05, "loss": 3.179, "step": 598500 }, { "epoch": 3.76, "learning_rate": 4.060073966168955e-05, "loss": 3.1706, "step": 599000 }, { "epoch": 3.76, "learning_rate": 4.0592890959550935e-05, "loss": 3.1754, "step": 599500 }, { "epoch": 3.77, "learning_rate": 4.058504225741232e-05, "loss": 3.178, "step": 600000 }, { "epoch": 3.77, "eval_accuracy": 0.43737965412008106, "eval_loss": 3.08634877204895, "eval_runtime": 1449.111, "eval_samples_per_second": 57.829, "eval_steps_per_second": 5.784, "step": 600000 }, { "epoch": 3.77, "learning_rate": 4.057720925267798e-05, "loss": 3.1718, "step": 600500 }, { "epoch": 3.77, "learning_rate": 4.0569360550539366e-05, "loss": 3.1722, "step": 601000 }, { "epoch": 3.78, "learning_rate": 4.0561511848400754e-05, "loss": 3.1734, "step": 601500 }, { "epoch": 3.78, "learning_rate": 4.0553663146262136e-05, "loss": 3.18, "step": 602000 }, { "epoch": 3.78, "learning_rate": 4.05458301415278e-05, "loss": 3.1706, "step": 602500 }, { "epoch": 3.79, "learning_rate": 4.0537981439389185e-05, "loss": 3.1732, "step": 603000 }, { "epoch": 3.79, "learning_rate": 4.053013273725057e-05, "loss": 3.1712, "step": 603500 }, { "epoch": 3.79, "learning_rate": 4.0522284035111955e-05, "loss": 3.1694, "step": 604000 }, { "epoch": 3.8, "learning_rate": 4.051445103037762e-05, "loss": 3.1704, "step": 604500 }, { "epoch": 3.8, "learning_rate": 4.0506618025643284e-05, "loss": 3.1663, "step": 605000 }, { "epoch": 3.8, "learning_rate": 4.0498769323504665e-05, "loss": 3.1708, "step": 605500 }, { "epoch": 3.81, "learning_rate": 4.0490920621366054e-05, "loss": 3.1694, "step": 606000 }, { "epoch": 3.81, "learning_rate": 4.048307191922744e-05, "loss": 3.173, "step": 606500 }, { "epoch": 3.81, "learning_rate": 4.047522321708882e-05, "loss": 3.169, "step": 607000 }, { "epoch": 3.81, "learning_rate": 4.0467374514950204e-05, "loss": 3.1773, "step": 607500 }, { "epoch": 3.82, "learning_rate": 4.045952581281159e-05, "loss": 3.1656, "step": 608000 }, { "epoch": 3.82, "learning_rate": 4.045167711067298e-05, "loss": 3.1665, "step": 608500 }, { "epoch": 3.82, "learning_rate": 4.044382840853437e-05, "loss": 3.168, "step": 609000 }, { "epoch": 3.83, "learning_rate": 4.043599540380003e-05, "loss": 3.1676, "step": 609500 }, { "epoch": 3.83, "learning_rate": 4.042814670166142e-05, "loss": 3.1718, "step": 610000 }, { "epoch": 3.83, "learning_rate": 4.04202979995228e-05, "loss": 3.1731, "step": 610500 }, { "epoch": 3.84, "learning_rate": 4.041244929738419e-05, "loss": 3.1748, "step": 611000 }, { "epoch": 3.84, "learning_rate": 4.0404600595245576e-05, "loss": 3.1669, "step": 611500 }, { "epoch": 3.84, "learning_rate": 4.039675189310696e-05, "loss": 3.1712, "step": 612000 }, { "epoch": 3.85, "learning_rate": 4.0388918888372625e-05, "loss": 3.1674, "step": 612500 }, { "epoch": 3.85, "learning_rate": 4.038107018623401e-05, "loss": 3.1678, "step": 613000 }, { "epoch": 3.85, "learning_rate": 4.037323718149967e-05, "loss": 3.1732, "step": 613500 }, { "epoch": 3.86, "learning_rate": 4.0365388479361056e-05, "loss": 3.1706, "step": 614000 }, { "epoch": 3.86, "learning_rate": 4.0357539777222444e-05, "loss": 3.1658, "step": 614500 }, { "epoch": 3.86, "learning_rate": 4.0349706772488106e-05, "loss": 3.1696, "step": 615000 }, { "epoch": 3.86, "learning_rate": 4.034185807034949e-05, "loss": 3.1724, "step": 615500 }, { "epoch": 3.87, "learning_rate": 4.0334009368210875e-05, "loss": 3.1687, "step": 616000 }, { "epoch": 3.87, "learning_rate": 4.0326160666072263e-05, "loss": 3.172, "step": 616500 }, { "epoch": 3.87, "learning_rate": 4.0318327661337925e-05, "loss": 3.1717, "step": 617000 }, { "epoch": 3.88, "learning_rate": 4.031047895919931e-05, "loss": 3.1712, "step": 617500 }, { "epoch": 3.88, "learning_rate": 4.0302630257060694e-05, "loss": 3.1666, "step": 618000 }, { "epoch": 3.88, "learning_rate": 4.0294781554922076e-05, "loss": 3.1751, "step": 618500 }, { "epoch": 3.89, "learning_rate": 4.0286932852783464e-05, "loss": 3.1765, "step": 619000 }, { "epoch": 3.89, "learning_rate": 4.027908415064485e-05, "loss": 3.1644, "step": 619500 }, { "epoch": 3.89, "learning_rate": 4.027123544850624e-05, "loss": 3.1703, "step": 620000 }, { "epoch": 3.9, "learning_rate": 4.026338674636762e-05, "loss": 3.1735, "step": 620500 }, { "epoch": 3.9, "learning_rate": 4.025553804422901e-05, "loss": 3.1747, "step": 621000 }, { "epoch": 3.9, "learning_rate": 4.02476893420904e-05, "loss": 3.1663, "step": 621500 }, { "epoch": 3.91, "learning_rate": 4.023984063995178e-05, "loss": 3.1686, "step": 622000 }, { "epoch": 3.91, "learning_rate": 4.023199193781316e-05, "loss": 3.1662, "step": 622500 }, { "epoch": 3.91, "learning_rate": 4.022414323567455e-05, "loss": 3.1687, "step": 623000 }, { "epoch": 3.91, "learning_rate": 4.0216294533535937e-05, "loss": 3.1713, "step": 623500 }, { "epoch": 3.92, "learning_rate": 4.0208445831397325e-05, "loss": 3.1633, "step": 624000 }, { "epoch": 3.92, "learning_rate": 4.020059712925871e-05, "loss": 3.1712, "step": 624500 }, { "epoch": 3.92, "learning_rate": 4.0192748427120094e-05, "loss": 3.1691, "step": 625000 }, { "epoch": 3.93, "learning_rate": 4.0184915422385756e-05, "loss": 3.1694, "step": 625500 }, { "epoch": 3.93, "learning_rate": 4.0177066720247144e-05, "loss": 3.1657, "step": 626000 }, { "epoch": 3.93, "learning_rate": 4.0169233715512805e-05, "loss": 3.1648, "step": 626500 }, { "epoch": 3.94, "learning_rate": 4.0161400710778466e-05, "loss": 3.1727, "step": 627000 }, { "epoch": 3.94, "learning_rate": 4.0153552008639855e-05, "loss": 3.174, "step": 627500 }, { "epoch": 3.94, "learning_rate": 4.0145703306501236e-05, "loss": 3.1634, "step": 628000 }, { "epoch": 3.95, "learning_rate": 4.01378703017669e-05, "loss": 3.1725, "step": 628500 }, { "epoch": 3.95, "learning_rate": 4.0130021599628285e-05, "loss": 3.1717, "step": 629000 }, { "epoch": 3.95, "learning_rate": 4.0122172897489674e-05, "loss": 3.1711, "step": 629500 }, { "epoch": 3.96, "learning_rate": 4.011432419535106e-05, "loss": 3.174, "step": 630000 }, { "epoch": 3.96, "eval_accuracy": 0.438145829417625, "eval_loss": 3.079467535018921, "eval_runtime": 1450.8203, "eval_samples_per_second": 57.761, "eval_steps_per_second": 5.777, "step": 630000 }, { "epoch": 3.96, "learning_rate": 4.010647549321244e-05, "loss": 3.1673, "step": 630500 }, { "epoch": 3.96, "learning_rate": 4.009862679107383e-05, "loss": 3.1676, "step": 631000 }, { "epoch": 3.97, "learning_rate": 4.009077808893521e-05, "loss": 3.1759, "step": 631500 }, { "epoch": 3.97, "learning_rate": 4.00829293867966e-05, "loss": 3.1624, "step": 632000 }, { "epoch": 3.97, "learning_rate": 4.007508068465799e-05, "loss": 3.1681, "step": 632500 }, { "epoch": 3.97, "learning_rate": 4.006723198251937e-05, "loss": 3.1685, "step": 633000 }, { "epoch": 3.98, "learning_rate": 4.005938328038076e-05, "loss": 3.1689, "step": 633500 }, { "epoch": 3.98, "learning_rate": 4.0051534578242146e-05, "loss": 3.1714, "step": 634000 }, { "epoch": 3.98, "learning_rate": 4.004370157350781e-05, "loss": 3.1666, "step": 634500 }, { "epoch": 3.99, "learning_rate": 4.0035852871369196e-05, "loss": 3.1708, "step": 635000 }, { "epoch": 3.99, "learning_rate": 4.002800416923058e-05, "loss": 3.1666, "step": 635500 }, { "epoch": 3.99, "learning_rate": 4.0020155467091965e-05, "loss": 3.1675, "step": 636000 }, { "epoch": 4.0, "learning_rate": 4.001230676495335e-05, "loss": 3.1674, "step": 636500 }, { "epoch": 4.0, "learning_rate": 4.0004458062814735e-05, "loss": 3.1662, "step": 637000 }, { "epoch": 4.0, "learning_rate": 3.9996609360676116e-05, "loss": 3.1484, "step": 637500 }, { "epoch": 4.01, "learning_rate": 3.9988760658537504e-05, "loss": 3.1488, "step": 638000 }, { "epoch": 4.01, "learning_rate": 3.998091195639889e-05, "loss": 3.1456, "step": 638500 }, { "epoch": 4.01, "learning_rate": 3.997306325426028e-05, "loss": 3.1456, "step": 639000 }, { "epoch": 4.02, "learning_rate": 3.996521455212166e-05, "loss": 3.1507, "step": 639500 }, { "epoch": 4.02, "learning_rate": 3.995736584998305e-05, "loss": 3.1469, "step": 640000 }, { "epoch": 4.02, "learning_rate": 3.994953284524871e-05, "loss": 3.1444, "step": 640500 }, { "epoch": 4.02, "learning_rate": 3.99416841431101e-05, "loss": 3.1418, "step": 641000 }, { "epoch": 4.03, "learning_rate": 3.993383544097148e-05, "loss": 3.1493, "step": 641500 }, { "epoch": 4.03, "learning_rate": 3.992598673883287e-05, "loss": 3.1475, "step": 642000 }, { "epoch": 4.03, "learning_rate": 3.991815373409853e-05, "loss": 3.1458, "step": 642500 }, { "epoch": 4.04, "learning_rate": 3.991030503195992e-05, "loss": 3.1438, "step": 643000 }, { "epoch": 4.04, "learning_rate": 3.99024563298213e-05, "loss": 3.1445, "step": 643500 }, { "epoch": 4.04, "learning_rate": 3.989460762768269e-05, "loss": 3.143, "step": 644000 }, { "epoch": 4.05, "learning_rate": 3.9886758925544076e-05, "loss": 3.151, "step": 644500 }, { "epoch": 4.05, "learning_rate": 3.987891022340546e-05, "loss": 3.1489, "step": 645000 }, { "epoch": 4.05, "learning_rate": 3.9871061521266846e-05, "loss": 3.1534, "step": 645500 }, { "epoch": 4.06, "learning_rate": 3.9863212819128234e-05, "loss": 3.1523, "step": 646000 }, { "epoch": 4.06, "learning_rate": 3.985536411698962e-05, "loss": 3.1526, "step": 646500 }, { "epoch": 4.06, "learning_rate": 3.9847515414851e-05, "loss": 3.1537, "step": 647000 }, { "epoch": 4.07, "learning_rate": 3.9839682410116665e-05, "loss": 3.1473, "step": 647500 }, { "epoch": 4.07, "learning_rate": 3.983183370797805e-05, "loss": 3.1458, "step": 648000 }, { "epoch": 4.07, "learning_rate": 3.9824000703243714e-05, "loss": 3.1496, "step": 648500 }, { "epoch": 4.08, "learning_rate": 3.98161520011051e-05, "loss": 3.1464, "step": 649000 }, { "epoch": 4.08, "learning_rate": 3.980830329896649e-05, "loss": 3.1547, "step": 649500 }, { "epoch": 4.08, "learning_rate": 3.980045459682787e-05, "loss": 3.151, "step": 650000 }, { "epoch": 4.08, "learning_rate": 3.979260589468925e-05, "loss": 3.1499, "step": 650500 }, { "epoch": 4.09, "learning_rate": 3.978475719255064e-05, "loss": 3.1456, "step": 651000 }, { "epoch": 4.09, "learning_rate": 3.977690849041203e-05, "loss": 3.1463, "step": 651500 }, { "epoch": 4.09, "learning_rate": 3.976905978827341e-05, "loss": 3.1514, "step": 652000 }, { "epoch": 4.1, "learning_rate": 3.976122678353907e-05, "loss": 3.1451, "step": 652500 }, { "epoch": 4.1, "learning_rate": 3.975337808140046e-05, "loss": 3.154, "step": 653000 }, { "epoch": 4.1, "learning_rate": 3.974552937926185e-05, "loss": 3.1474, "step": 653500 }, { "epoch": 4.11, "learning_rate": 3.973769637452751e-05, "loss": 3.1504, "step": 654000 }, { "epoch": 4.11, "learning_rate": 3.972986336979317e-05, "loss": 3.1493, "step": 654500 }, { "epoch": 4.11, "learning_rate": 3.972201466765456e-05, "loss": 3.1468, "step": 655000 }, { "epoch": 4.12, "learning_rate": 3.971416596551594e-05, "loss": 3.1533, "step": 655500 }, { "epoch": 4.12, "learning_rate": 3.970631726337733e-05, "loss": 3.1464, "step": 656000 }, { "epoch": 4.12, "learning_rate": 3.969848425864299e-05, "loss": 3.1541, "step": 656500 }, { "epoch": 4.13, "learning_rate": 3.969063555650438e-05, "loss": 3.1537, "step": 657000 }, { "epoch": 4.13, "learning_rate": 3.9682786854365766e-05, "loss": 3.1531, "step": 657500 }, { "epoch": 4.13, "learning_rate": 3.967493815222715e-05, "loss": 3.1498, "step": 658000 }, { "epoch": 4.13, "learning_rate": 3.9667089450088536e-05, "loss": 3.1477, "step": 658500 }, { "epoch": 4.14, "learning_rate": 3.9659240747949924e-05, "loss": 3.1492, "step": 659000 }, { "epoch": 4.14, "learning_rate": 3.9651392045811305e-05, "loss": 3.1524, "step": 659500 }, { "epoch": 4.14, "learning_rate": 3.964354334367269e-05, "loss": 3.148, "step": 660000 }, { "epoch": 4.14, "eval_accuracy": 0.43898404616634557, "eval_loss": 3.0737648010253906, "eval_runtime": 1450.7483, "eval_samples_per_second": 57.764, "eval_steps_per_second": 5.777, "step": 660000 }, { "epoch": 4.15, "learning_rate": 3.9635694641534075e-05, "loss": 3.1486, "step": 660500 }, { "epoch": 4.15, "learning_rate": 3.962784593939546e-05, "loss": 3.1386, "step": 661000 }, { "epoch": 4.15, "learning_rate": 3.9620012934661124e-05, "loss": 3.153, "step": 661500 }, { "epoch": 4.16, "learning_rate": 3.961216423252251e-05, "loss": 3.1526, "step": 662000 }, { "epoch": 4.16, "learning_rate": 3.9604315530383894e-05, "loss": 3.1539, "step": 662500 }, { "epoch": 4.16, "learning_rate": 3.959646682824528e-05, "loss": 3.1465, "step": 663000 }, { "epoch": 4.17, "learning_rate": 3.958861812610667e-05, "loss": 3.1516, "step": 663500 }, { "epoch": 4.17, "learning_rate": 3.958076942396806e-05, "loss": 3.1529, "step": 664000 }, { "epoch": 4.17, "learning_rate": 3.957292072182944e-05, "loss": 3.1548, "step": 664500 }, { "epoch": 4.18, "learning_rate": 3.956507201969082e-05, "loss": 3.1529, "step": 665000 }, { "epoch": 4.18, "learning_rate": 3.955722331755221e-05, "loss": 3.1469, "step": 665500 }, { "epoch": 4.18, "learning_rate": 3.954939031281788e-05, "loss": 3.1466, "step": 666000 }, { "epoch": 4.18, "learning_rate": 3.954154161067926e-05, "loss": 3.1539, "step": 666500 }, { "epoch": 4.19, "learning_rate": 3.953369290854065e-05, "loss": 3.1522, "step": 667000 }, { "epoch": 4.19, "learning_rate": 3.952585990380631e-05, "loss": 3.1553, "step": 667500 }, { "epoch": 4.19, "learning_rate": 3.9518011201667696e-05, "loss": 3.148, "step": 668000 }, { "epoch": 4.2, "learning_rate": 3.951016249952908e-05, "loss": 3.1607, "step": 668500 }, { "epoch": 4.2, "learning_rate": 3.9502313797390466e-05, "loss": 3.148, "step": 669000 }, { "epoch": 4.2, "learning_rate": 3.9494465095251854e-05, "loss": 3.1486, "step": 669500 }, { "epoch": 4.21, "learning_rate": 3.9486616393113235e-05, "loss": 3.1507, "step": 670000 }, { "epoch": 4.21, "learning_rate": 3.947876769097462e-05, "loss": 3.146, "step": 670500 }, { "epoch": 4.21, "learning_rate": 3.947091898883601e-05, "loss": 3.1459, "step": 671000 }, { "epoch": 4.22, "learning_rate": 3.946307028669739e-05, "loss": 3.1489, "step": 671500 }, { "epoch": 4.22, "learning_rate": 3.9455221584558774e-05, "loss": 3.1494, "step": 672000 }, { "epoch": 4.22, "learning_rate": 3.944737288242016e-05, "loss": 3.1563, "step": 672500 }, { "epoch": 4.23, "learning_rate": 3.943953987768583e-05, "loss": 3.1457, "step": 673000 }, { "epoch": 4.23, "learning_rate": 3.943169117554721e-05, "loss": 3.1483, "step": 673500 }, { "epoch": 4.23, "learning_rate": 3.94238424734086e-05, "loss": 3.1518, "step": 674000 }, { "epoch": 4.24, "learning_rate": 3.941600946867426e-05, "loss": 3.1423, "step": 674500 }, { "epoch": 4.24, "learning_rate": 3.940816076653564e-05, "loss": 3.1516, "step": 675000 }, { "epoch": 4.24, "learning_rate": 3.940031206439703e-05, "loss": 3.1526, "step": 675500 }, { "epoch": 4.24, "learning_rate": 3.939246336225842e-05, "loss": 3.1424, "step": 676000 }, { "epoch": 4.25, "learning_rate": 3.938463035752408e-05, "loss": 3.1484, "step": 676500 }, { "epoch": 4.25, "learning_rate": 3.937678165538547e-05, "loss": 3.1469, "step": 677000 }, { "epoch": 4.25, "learning_rate": 3.936893295324685e-05, "loss": 3.154, "step": 677500 }, { "epoch": 4.26, "learning_rate": 3.936108425110824e-05, "loss": 3.1498, "step": 678000 }, { "epoch": 4.26, "learning_rate": 3.9353235548969626e-05, "loss": 3.1517, "step": 678500 }, { "epoch": 4.26, "learning_rate": 3.9345386846831014e-05, "loss": 3.1532, "step": 679000 }, { "epoch": 4.27, "learning_rate": 3.9337538144692395e-05, "loss": 3.1495, "step": 679500 }, { "epoch": 4.27, "learning_rate": 3.932968944255378e-05, "loss": 3.1519, "step": 680000 }, { "epoch": 4.27, "learning_rate": 3.9321840740415165e-05, "loss": 3.1516, "step": 680500 }, { "epoch": 4.28, "learning_rate": 3.931399203827655e-05, "loss": 3.1487, "step": 681000 }, { "epoch": 4.28, "learning_rate": 3.930614333613794e-05, "loss": 3.1427, "step": 681500 }, { "epoch": 4.28, "learning_rate": 3.929829463399932e-05, "loss": 3.1437, "step": 682000 }, { "epoch": 4.29, "learning_rate": 3.9290477326669264e-05, "loss": 3.1523, "step": 682500 }, { "epoch": 4.29, "learning_rate": 3.9282628624530645e-05, "loss": 3.156, "step": 683000 }, { "epoch": 4.29, "learning_rate": 3.927477992239203e-05, "loss": 3.1502, "step": 683500 }, { "epoch": 4.29, "learning_rate": 3.926693122025342e-05, "loss": 3.1475, "step": 684000 }, { "epoch": 4.3, "learning_rate": 3.925909821551908e-05, "loss": 3.1488, "step": 684500 }, { "epoch": 4.3, "learning_rate": 3.925126521078475e-05, "loss": 3.1527, "step": 685000 }, { "epoch": 4.3, "learning_rate": 3.9243432206050406e-05, "loss": 3.1494, "step": 685500 }, { "epoch": 4.31, "learning_rate": 3.923559920131607e-05, "loss": 3.1498, "step": 686000 }, { "epoch": 4.31, "learning_rate": 3.9227750499177455e-05, "loss": 3.1497, "step": 686500 }, { "epoch": 4.31, "learning_rate": 3.921990179703884e-05, "loss": 3.1449, "step": 687000 }, { "epoch": 4.32, "learning_rate": 3.921205309490023e-05, "loss": 3.1465, "step": 687500 }, { "epoch": 4.32, "learning_rate": 3.920420439276161e-05, "loss": 3.1524, "step": 688000 }, { "epoch": 4.32, "learning_rate": 3.9196355690623e-05, "loss": 3.1589, "step": 688500 }, { "epoch": 4.33, "learning_rate": 3.918850698848439e-05, "loss": 3.144, "step": 689000 }, { "epoch": 4.33, "learning_rate": 3.918065828634577e-05, "loss": 3.1499, "step": 689500 }, { "epoch": 4.33, "learning_rate": 3.917280958420716e-05, "loss": 3.146, "step": 690000 }, { "epoch": 4.33, "eval_accuracy": 0.43961423389299953, "eval_loss": 3.0692529678344727, "eval_runtime": 1449.6191, "eval_samples_per_second": 57.809, "eval_steps_per_second": 5.782, "step": 690000 }, { "epoch": 4.34, "learning_rate": 3.916496088206854e-05, "loss": 3.1472, "step": 690500 }, { "epoch": 4.34, "learning_rate": 3.915711217992993e-05, "loss": 3.1517, "step": 691000 }, { "epoch": 4.34, "learning_rate": 3.9149263477791316e-05, "loss": 3.1464, "step": 691500 }, { "epoch": 4.35, "learning_rate": 3.9141414775652704e-05, "loss": 3.145, "step": 692000 }, { "epoch": 4.35, "learning_rate": 3.9133566073514086e-05, "loss": 3.1516, "step": 692500 }, { "epoch": 4.35, "learning_rate": 3.912571737137547e-05, "loss": 3.1441, "step": 693000 }, { "epoch": 4.35, "learning_rate": 3.9117868669236855e-05, "loss": 3.147, "step": 693500 }, { "epoch": 4.36, "learning_rate": 3.911001996709824e-05, "loss": 3.1564, "step": 694000 }, { "epoch": 4.36, "learning_rate": 3.910217126495963e-05, "loss": 3.1494, "step": 694500 }, { "epoch": 4.36, "learning_rate": 3.909432256282101e-05, "loss": 3.1436, "step": 695000 }, { "epoch": 4.37, "learning_rate": 3.90864738606824e-05, "loss": 3.1513, "step": 695500 }, { "epoch": 4.37, "learning_rate": 3.907862515854379e-05, "loss": 3.1441, "step": 696000 }, { "epoch": 4.37, "learning_rate": 3.907079215380945e-05, "loss": 3.1546, "step": 696500 }, { "epoch": 4.38, "learning_rate": 3.906294345167084e-05, "loss": 3.1517, "step": 697000 }, { "epoch": 4.38, "learning_rate": 3.905509474953222e-05, "loss": 3.149, "step": 697500 }, { "epoch": 4.38, "learning_rate": 3.90472460473936e-05, "loss": 3.1483, "step": 698000 }, { "epoch": 4.39, "learning_rate": 3.903941304265927e-05, "loss": 3.144, "step": 698500 }, { "epoch": 4.39, "learning_rate": 3.903156434052066e-05, "loss": 3.1484, "step": 699000 }, { "epoch": 4.39, "learning_rate": 3.902371563838204e-05, "loss": 3.1477, "step": 699500 }, { "epoch": 4.4, "learning_rate": 3.901586693624342e-05, "loss": 3.1436, "step": 700000 }, { "epoch": 4.4, "learning_rate": 3.900801823410481e-05, "loss": 3.1466, "step": 700500 }, { "epoch": 4.4, "learning_rate": 3.900018522937047e-05, "loss": 3.1516, "step": 701000 }, { "epoch": 4.4, "learning_rate": 3.899233652723186e-05, "loss": 3.1532, "step": 701500 }, { "epoch": 4.41, "learning_rate": 3.8984487825093246e-05, "loss": 3.15, "step": 702000 }, { "epoch": 4.41, "learning_rate": 3.897665482035891e-05, "loss": 3.1497, "step": 702500 }, { "epoch": 4.41, "learning_rate": 3.896880611822029e-05, "loss": 3.1485, "step": 703000 }, { "epoch": 4.42, "learning_rate": 3.896095741608168e-05, "loss": 3.1424, "step": 703500 }, { "epoch": 4.42, "learning_rate": 3.8953108713943065e-05, "loss": 3.1499, "step": 704000 }, { "epoch": 4.42, "learning_rate": 3.8945275709208726e-05, "loss": 3.1474, "step": 704500 }, { "epoch": 4.43, "learning_rate": 3.8937427007070114e-05, "loss": 3.1402, "step": 705000 }, { "epoch": 4.43, "learning_rate": 3.8929578304931496e-05, "loss": 3.1469, "step": 705500 }, { "epoch": 4.43, "learning_rate": 3.8921729602792884e-05, "loss": 3.1459, "step": 706000 }, { "epoch": 4.44, "learning_rate": 3.891388090065427e-05, "loss": 3.1464, "step": 706500 }, { "epoch": 4.44, "learning_rate": 3.890603219851566e-05, "loss": 3.1499, "step": 707000 }, { "epoch": 4.44, "learning_rate": 3.889818349637704e-05, "loss": 3.156, "step": 707500 }, { "epoch": 4.45, "learning_rate": 3.889033479423842e-05, "loss": 3.1421, "step": 708000 }, { "epoch": 4.45, "learning_rate": 3.888248609209981e-05, "loss": 3.1437, "step": 708500 }, { "epoch": 4.45, "learning_rate": 3.88746373899612e-05, "loss": 3.1479, "step": 709000 }, { "epoch": 4.45, "learning_rate": 3.886680438522686e-05, "loss": 3.1462, "step": 709500 }, { "epoch": 4.46, "learning_rate": 3.885895568308824e-05, "loss": 3.1435, "step": 710000 }, { "epoch": 4.46, "learning_rate": 3.885110698094963e-05, "loss": 3.1485, "step": 710500 }, { "epoch": 4.46, "learning_rate": 3.884325827881102e-05, "loss": 3.1473, "step": 711000 }, { "epoch": 4.47, "learning_rate": 3.883542527407668e-05, "loss": 3.1461, "step": 711500 }, { "epoch": 4.47, "learning_rate": 3.882757657193807e-05, "loss": 3.1494, "step": 712000 }, { "epoch": 4.47, "learning_rate": 3.881974356720373e-05, "loss": 3.1529, "step": 712500 }, { "epoch": 4.48, "learning_rate": 3.881189486506511e-05, "loss": 3.1406, "step": 713000 }, { "epoch": 4.48, "learning_rate": 3.88040461629265e-05, "loss": 3.1541, "step": 713500 }, { "epoch": 4.48, "learning_rate": 3.8796197460787887e-05, "loss": 3.1409, "step": 714000 }, { "epoch": 4.49, "learning_rate": 3.8788348758649275e-05, "loss": 3.1413, "step": 714500 }, { "epoch": 4.49, "learning_rate": 3.8780500056510656e-05, "loss": 3.1494, "step": 715000 }, { "epoch": 4.49, "learning_rate": 3.8772651354372044e-05, "loss": 3.1392, "step": 715500 }, { "epoch": 4.5, "learning_rate": 3.8764802652233425e-05, "loss": 3.1454, "step": 716000 }, { "epoch": 4.5, "learning_rate": 3.8756953950094814e-05, "loss": 3.1466, "step": 716500 }, { "epoch": 4.5, "learning_rate": 3.87491052479562e-05, "loss": 3.1523, "step": 717000 }, { "epoch": 4.51, "learning_rate": 3.874125654581758e-05, "loss": 3.1411, "step": 717500 }, { "epoch": 4.51, "learning_rate": 3.873340784367897e-05, "loss": 3.1438, "step": 718000 }, { "epoch": 4.51, "learning_rate": 3.872555914154036e-05, "loss": 3.1519, "step": 718500 }, { "epoch": 4.51, "learning_rate": 3.871772613680602e-05, "loss": 3.1397, "step": 719000 }, { "epoch": 4.52, "learning_rate": 3.870987743466741e-05, "loss": 3.1415, "step": 719500 }, { "epoch": 4.52, "learning_rate": 3.870202873252879e-05, "loss": 3.1429, "step": 720000 }, { "epoch": 4.52, "eval_accuracy": 0.4404408558874342, "eval_loss": 3.0631744861602783, "eval_runtime": 1450.6574, "eval_samples_per_second": 57.768, "eval_steps_per_second": 5.777, "step": 720000 }, { "epoch": 4.52, "learning_rate": 3.869419572779445e-05, "loss": 3.1485, "step": 720500 }, { "epoch": 4.53, "learning_rate": 3.868634702565584e-05, "loss": 3.1421, "step": 721000 }, { "epoch": 4.53, "learning_rate": 3.867849832351723e-05, "loss": 3.1425, "step": 721500 }, { "epoch": 4.53, "learning_rate": 3.8670649621378616e-05, "loss": 3.1524, "step": 722000 }, { "epoch": 4.54, "learning_rate": 3.866280091924e-05, "loss": 3.1479, "step": 722500 }, { "epoch": 4.54, "learning_rate": 3.865495221710138e-05, "loss": 3.1429, "step": 723000 }, { "epoch": 4.54, "learning_rate": 3.864710351496277e-05, "loss": 3.1486, "step": 723500 }, { "epoch": 4.55, "learning_rate": 3.8639254812824155e-05, "loss": 3.1495, "step": 724000 }, { "epoch": 4.55, "learning_rate": 3.8631406110685536e-05, "loss": 3.1492, "step": 724500 }, { "epoch": 4.55, "learning_rate": 3.8623557408546924e-05, "loss": 3.1418, "step": 725000 }, { "epoch": 4.56, "learning_rate": 3.8615724403812586e-05, "loss": 3.1525, "step": 725500 }, { "epoch": 4.56, "learning_rate": 3.8607875701673974e-05, "loss": 3.1441, "step": 726000 }, { "epoch": 4.56, "learning_rate": 3.860002699953536e-05, "loss": 3.1469, "step": 726500 }, { "epoch": 4.56, "learning_rate": 3.8592193994801023e-05, "loss": 3.1436, "step": 727000 }, { "epoch": 4.57, "learning_rate": 3.8584345292662405e-05, "loss": 3.1486, "step": 727500 }, { "epoch": 4.57, "learning_rate": 3.857649659052379e-05, "loss": 3.1502, "step": 728000 }, { "epoch": 4.57, "learning_rate": 3.856864788838518e-05, "loss": 3.1473, "step": 728500 }, { "epoch": 4.58, "learning_rate": 3.856079918624656e-05, "loss": 3.1456, "step": 729000 }, { "epoch": 4.58, "learning_rate": 3.855296618151223e-05, "loss": 3.1439, "step": 729500 }, { "epoch": 4.58, "learning_rate": 3.854511747937361e-05, "loss": 3.1447, "step": 730000 }, { "epoch": 4.59, "learning_rate": 3.8537268777235e-05, "loss": 3.1404, "step": 730500 }, { "epoch": 4.59, "learning_rate": 3.852942007509638e-05, "loss": 3.1437, "step": 731000 }, { "epoch": 4.59, "learning_rate": 3.852157137295777e-05, "loss": 3.1473, "step": 731500 }, { "epoch": 4.6, "learning_rate": 3.851372267081916e-05, "loss": 3.1378, "step": 732000 }, { "epoch": 4.6, "learning_rate": 3.850587396868054e-05, "loss": 3.1449, "step": 732500 }, { "epoch": 4.6, "learning_rate": 3.849802526654193e-05, "loss": 3.1412, "step": 733000 }, { "epoch": 4.61, "learning_rate": 3.8490176564403315e-05, "loss": 3.1418, "step": 733500 }, { "epoch": 4.61, "learning_rate": 3.8482327862264697e-05, "loss": 3.1432, "step": 734000 }, { "epoch": 4.61, "learning_rate": 3.847447916012608e-05, "loss": 3.149, "step": 734500 }, { "epoch": 4.62, "learning_rate": 3.8466630457987466e-05, "loss": 3.1461, "step": 735000 }, { "epoch": 4.62, "learning_rate": 3.8458797453253134e-05, "loss": 3.1474, "step": 735500 }, { "epoch": 4.62, "learning_rate": 3.8450948751114516e-05, "loss": 3.1508, "step": 736000 }, { "epoch": 4.62, "learning_rate": 3.8443100048975904e-05, "loss": 3.1461, "step": 736500 }, { "epoch": 4.63, "learning_rate": 3.8435251346837285e-05, "loss": 3.1405, "step": 737000 }, { "epoch": 4.63, "learning_rate": 3.842740264469867e-05, "loss": 3.145, "step": 737500 }, { "epoch": 4.63, "learning_rate": 3.8419585337368615e-05, "loss": 3.1383, "step": 738000 }, { "epoch": 4.64, "learning_rate": 3.841173663523e-05, "loss": 3.1457, "step": 738500 }, { "epoch": 4.64, "learning_rate": 3.8403887933091384e-05, "loss": 3.1482, "step": 739000 }, { "epoch": 4.64, "learning_rate": 3.839603923095277e-05, "loss": 3.146, "step": 739500 }, { "epoch": 4.65, "learning_rate": 3.8388190528814154e-05, "loss": 3.1354, "step": 740000 }, { "epoch": 4.65, "learning_rate": 3.838035752407982e-05, "loss": 3.1428, "step": 740500 }, { "epoch": 4.65, "learning_rate": 3.83725088219412e-05, "loss": 3.1412, "step": 741000 }, { "epoch": 4.66, "learning_rate": 3.836467581720687e-05, "loss": 3.1501, "step": 741500 }, { "epoch": 4.66, "learning_rate": 3.835682711506825e-05, "loss": 3.1403, "step": 742000 }, { "epoch": 4.66, "learning_rate": 3.834897841292964e-05, "loss": 3.1436, "step": 742500 }, { "epoch": 4.67, "learning_rate": 3.834112971079102e-05, "loss": 3.1444, "step": 743000 }, { "epoch": 4.67, "learning_rate": 3.833329670605669e-05, "loss": 3.1436, "step": 743500 }, { "epoch": 4.67, "learning_rate": 3.832544800391807e-05, "loss": 3.1507, "step": 744000 }, { "epoch": 4.67, "learning_rate": 3.831759930177946e-05, "loss": 3.1454, "step": 744500 }, { "epoch": 4.68, "learning_rate": 3.830975059964085e-05, "loss": 3.1416, "step": 745000 }, { "epoch": 4.68, "learning_rate": 3.830190189750223e-05, "loss": 3.1392, "step": 745500 }, { "epoch": 4.68, "learning_rate": 3.829405319536362e-05, "loss": 3.137, "step": 746000 }, { "epoch": 4.69, "learning_rate": 3.8286204493225005e-05, "loss": 3.141, "step": 746500 }, { "epoch": 4.69, "learning_rate": 3.827835579108639e-05, "loss": 3.14, "step": 747000 }, { "epoch": 4.69, "learning_rate": 3.827050708894777e-05, "loss": 3.1512, "step": 747500 }, { "epoch": 4.7, "learning_rate": 3.8262658386809156e-05, "loss": 3.1402, "step": 748000 }, { "epoch": 4.7, "learning_rate": 3.8254809684670544e-05, "loss": 3.145, "step": 748500 }, { "epoch": 4.7, "learning_rate": 3.824696098253193e-05, "loss": 3.1406, "step": 749000 }, { "epoch": 4.71, "learning_rate": 3.8239112280393314e-05, "loss": 3.1441, "step": 749500 }, { "epoch": 4.71, "learning_rate": 3.8231279275658975e-05, "loss": 3.1399, "step": 750000 }, { "epoch": 4.71, "eval_accuracy": 0.4410138863746508, "eval_loss": 3.0572471618652344, "eval_runtime": 1450.5672, "eval_samples_per_second": 57.771, "eval_steps_per_second": 5.778, "step": 750000 }, { "epoch": 4.71, "learning_rate": 3.822343057352036e-05, "loss": 3.1487, "step": 750500 }, { "epoch": 4.72, "learning_rate": 3.821558187138175e-05, "loss": 3.141, "step": 751000 }, { "epoch": 4.72, "learning_rate": 3.820773316924314e-05, "loss": 3.1447, "step": 751500 }, { "epoch": 4.72, "learning_rate": 3.81999001645088e-05, "loss": 3.1452, "step": 752000 }, { "epoch": 4.72, "learning_rate": 3.819205146237018e-05, "loss": 3.1428, "step": 752500 }, { "epoch": 4.73, "learning_rate": 3.818420276023157e-05, "loss": 3.1377, "step": 753000 }, { "epoch": 4.73, "learning_rate": 3.817635405809296e-05, "loss": 3.1444, "step": 753500 }, { "epoch": 4.73, "learning_rate": 3.816850535595434e-05, "loss": 3.1456, "step": 754000 }, { "epoch": 4.74, "learning_rate": 3.816065665381573e-05, "loss": 3.141, "step": 754500 }, { "epoch": 4.74, "learning_rate": 3.815280795167711e-05, "loss": 3.1406, "step": 755000 }, { "epoch": 4.74, "learning_rate": 3.81449592495385e-05, "loss": 3.1416, "step": 755500 }, { "epoch": 4.75, "learning_rate": 3.813712624480416e-05, "loss": 3.1447, "step": 756000 }, { "epoch": 4.75, "learning_rate": 3.812927754266555e-05, "loss": 3.1443, "step": 756500 }, { "epoch": 4.75, "learning_rate": 3.8121428840526935e-05, "loss": 3.1443, "step": 757000 }, { "epoch": 4.76, "learning_rate": 3.8113580138388317e-05, "loss": 3.1446, "step": 757500 }, { "epoch": 4.76, "learning_rate": 3.8105731436249705e-05, "loss": 3.1427, "step": 758000 }, { "epoch": 4.76, "learning_rate": 3.8097914128919646e-05, "loss": 3.147, "step": 758500 }, { "epoch": 4.77, "learning_rate": 3.809006542678103e-05, "loss": 3.1502, "step": 759000 }, { "epoch": 4.77, "learning_rate": 3.8082216724642416e-05, "loss": 3.1395, "step": 759500 }, { "epoch": 4.77, "learning_rate": 3.80743680225038e-05, "loss": 3.1405, "step": 760000 }, { "epoch": 4.78, "learning_rate": 3.806653501776946e-05, "loss": 3.1337, "step": 760500 }, { "epoch": 4.78, "learning_rate": 3.8058686315630846e-05, "loss": 3.1373, "step": 761000 }, { "epoch": 4.78, "learning_rate": 3.8050837613492235e-05, "loss": 3.1359, "step": 761500 }, { "epoch": 4.78, "learning_rate": 3.804298891135362e-05, "loss": 3.1408, "step": 762000 }, { "epoch": 4.79, "learning_rate": 3.8035140209215004e-05, "loss": 3.1382, "step": 762500 }, { "epoch": 4.79, "learning_rate": 3.802729150707639e-05, "loss": 3.1426, "step": 763000 }, { "epoch": 4.79, "learning_rate": 3.801944280493778e-05, "loss": 3.1338, "step": 763500 }, { "epoch": 4.8, "learning_rate": 3.801159410279916e-05, "loss": 3.143, "step": 764000 }, { "epoch": 4.8, "learning_rate": 3.800374540066055e-05, "loss": 3.1411, "step": 764500 }, { "epoch": 4.8, "learning_rate": 3.799591239592621e-05, "loss": 3.1434, "step": 765000 }, { "epoch": 4.81, "learning_rate": 3.798806369378759e-05, "loss": 3.1377, "step": 765500 }, { "epoch": 4.81, "learning_rate": 3.798021499164898e-05, "loss": 3.1428, "step": 766000 }, { "epoch": 4.81, "learning_rate": 3.797236628951037e-05, "loss": 3.1425, "step": 766500 }, { "epoch": 4.82, "learning_rate": 3.796451758737176e-05, "loss": 3.1447, "step": 767000 }, { "epoch": 4.82, "learning_rate": 3.795668458263742e-05, "loss": 3.1423, "step": 767500 }, { "epoch": 4.82, "learning_rate": 3.79488358804988e-05, "loss": 3.1437, "step": 768000 }, { "epoch": 4.83, "learning_rate": 3.794098717836019e-05, "loss": 3.1401, "step": 768500 }, { "epoch": 4.83, "learning_rate": 3.7933138476221576e-05, "loss": 3.1384, "step": 769000 }, { "epoch": 4.83, "learning_rate": 3.792530547148724e-05, "loss": 3.1404, "step": 769500 }, { "epoch": 4.83, "learning_rate": 3.7917456769348625e-05, "loss": 3.1405, "step": 770000 }, { "epoch": 4.84, "learning_rate": 3.790960806721001e-05, "loss": 3.1429, "step": 770500 }, { "epoch": 4.84, "learning_rate": 3.7901759365071395e-05, "loss": 3.1408, "step": 771000 }, { "epoch": 4.84, "learning_rate": 3.789391066293278e-05, "loss": 3.1358, "step": 771500 }, { "epoch": 4.85, "learning_rate": 3.7886061960794164e-05, "loss": 3.1455, "step": 772000 }, { "epoch": 4.85, "learning_rate": 3.7878213258655546e-05, "loss": 3.1442, "step": 772500 }, { "epoch": 4.85, "learning_rate": 3.7870364556516934e-05, "loss": 3.1415, "step": 773000 }, { "epoch": 4.86, "learning_rate": 3.7862531551782595e-05, "loss": 3.1387, "step": 773500 }, { "epoch": 4.86, "learning_rate": 3.785468284964398e-05, "loss": 3.1458, "step": 774000 }, { "epoch": 4.86, "learning_rate": 3.784683414750537e-05, "loss": 3.142, "step": 774500 }, { "epoch": 4.87, "learning_rate": 3.783898544536675e-05, "loss": 3.1427, "step": 775000 }, { "epoch": 4.87, "learning_rate": 3.7831152440632414e-05, "loss": 3.1406, "step": 775500 }, { "epoch": 4.87, "learning_rate": 3.78233037384938e-05, "loss": 3.1321, "step": 776000 }, { "epoch": 4.88, "learning_rate": 3.781545503635519e-05, "loss": 3.1398, "step": 776500 }, { "epoch": 4.88, "learning_rate": 3.780760633421658e-05, "loss": 3.1326, "step": 777000 }, { "epoch": 4.88, "learning_rate": 3.779975763207796e-05, "loss": 3.1377, "step": 777500 }, { "epoch": 4.89, "learning_rate": 3.779190892993935e-05, "loss": 3.1338, "step": 778000 }, { "epoch": 4.89, "learning_rate": 3.778406022780073e-05, "loss": 3.1361, "step": 778500 }, { "epoch": 4.89, "learning_rate": 3.777621152566212e-05, "loss": 3.1374, "step": 779000 }, { "epoch": 4.89, "learning_rate": 3.7768378520927786e-05, "loss": 3.1382, "step": 779500 }, { "epoch": 4.9, "learning_rate": 3.776052981878917e-05, "loss": 3.1375, "step": 780000 }, { "epoch": 4.9, "eval_accuracy": 0.4416947224142919, "eval_loss": 3.052121877670288, "eval_runtime": 1450.1296, "eval_samples_per_second": 57.789, "eval_steps_per_second": 5.779, "step": 780000 }, { "epoch": 4.9, "learning_rate": 3.775268111665055e-05, "loss": 3.1391, "step": 780500 }, { "epoch": 4.9, "learning_rate": 3.7744832414511936e-05, "loss": 3.1375, "step": 781000 }, { "epoch": 4.91, "learning_rate": 3.7736983712373325e-05, "loss": 3.1405, "step": 781500 }, { "epoch": 4.91, "learning_rate": 3.772913501023471e-05, "loss": 3.1333, "step": 782000 }, { "epoch": 4.91, "learning_rate": 3.7721286308096094e-05, "loss": 3.147, "step": 782500 }, { "epoch": 4.92, "learning_rate": 3.771343760595748e-05, "loss": 3.1427, "step": 783000 }, { "epoch": 4.92, "learning_rate": 3.7705604601223144e-05, "loss": 3.1408, "step": 783500 }, { "epoch": 4.92, "learning_rate": 3.769775589908453e-05, "loss": 3.1353, "step": 784000 }, { "epoch": 4.93, "learning_rate": 3.768990719694592e-05, "loss": 3.1408, "step": 784500 }, { "epoch": 4.93, "learning_rate": 3.76820584948073e-05, "loss": 3.1324, "step": 785000 }, { "epoch": 4.93, "learning_rate": 3.767422549007296e-05, "loss": 3.1421, "step": 785500 }, { "epoch": 4.94, "learning_rate": 3.766637678793435e-05, "loss": 3.14, "step": 786000 }, { "epoch": 4.94, "learning_rate": 3.765852808579574e-05, "loss": 3.1355, "step": 786500 }, { "epoch": 4.94, "learning_rate": 3.765067938365712e-05, "loss": 3.1375, "step": 787000 }, { "epoch": 4.94, "learning_rate": 3.76428306815185e-05, "loss": 3.1285, "step": 787500 }, { "epoch": 4.95, "learning_rate": 3.763499767678417e-05, "loss": 3.1354, "step": 788000 }, { "epoch": 4.95, "learning_rate": 3.762714897464555e-05, "loss": 3.1348, "step": 788500 }, { "epoch": 4.95, "learning_rate": 3.761930027250694e-05, "loss": 3.1438, "step": 789000 }, { "epoch": 4.96, "learning_rate": 3.761145157036833e-05, "loss": 3.1397, "step": 789500 }, { "epoch": 4.96, "learning_rate": 3.760360286822971e-05, "loss": 3.14, "step": 790000 }, { "epoch": 4.96, "learning_rate": 3.75957541660911e-05, "loss": 3.1316, "step": 790500 }, { "epoch": 4.97, "learning_rate": 3.7587905463952485e-05, "loss": 3.1421, "step": 791000 }, { "epoch": 4.97, "learning_rate": 3.758005676181387e-05, "loss": 3.1407, "step": 791500 }, { "epoch": 4.97, "learning_rate": 3.7572223757079534e-05, "loss": 3.1476, "step": 792000 }, { "epoch": 4.98, "learning_rate": 3.7564375054940916e-05, "loss": 3.1418, "step": 792500 }, { "epoch": 4.98, "learning_rate": 3.7556526352802304e-05, "loss": 3.136, "step": 793000 }, { "epoch": 4.98, "learning_rate": 3.7548677650663685e-05, "loss": 3.1384, "step": 793500 }, { "epoch": 4.99, "learning_rate": 3.754084464592935e-05, "loss": 3.1395, "step": 794000 }, { "epoch": 4.99, "learning_rate": 3.753299594379074e-05, "loss": 3.1336, "step": 794500 }, { "epoch": 4.99, "learning_rate": 3.752514724165212e-05, "loss": 3.1464, "step": 795000 }, { "epoch": 4.99, "learning_rate": 3.7517314236917784e-05, "loss": 3.1371, "step": 795500 }, { "epoch": 5.0, "learning_rate": 3.750946553477917e-05, "loss": 3.1371, "step": 796000 }, { "epoch": 5.0, "learning_rate": 3.7501616832640554e-05, "loss": 3.1228, "step": 796500 }, { "epoch": 5.0, "learning_rate": 3.749376813050194e-05, "loss": 3.1185, "step": 797000 }, { "epoch": 5.01, "learning_rate": 3.748591942836332e-05, "loss": 3.1104, "step": 797500 }, { "epoch": 5.01, "learning_rate": 3.747807072622471e-05, "loss": 3.1111, "step": 798000 }, { "epoch": 5.01, "learning_rate": 3.747023772149037e-05, "loss": 3.111, "step": 798500 }, { "epoch": 5.02, "learning_rate": 3.746238901935176e-05, "loss": 3.1153, "step": 799000 }, { "epoch": 5.02, "learning_rate": 3.745454031721315e-05, "loss": 3.1206, "step": 799500 }, { "epoch": 5.02, "learning_rate": 3.744670731247881e-05, "loss": 3.123, "step": 800000 }, { "epoch": 5.03, "learning_rate": 3.743885861034019e-05, "loss": 3.1197, "step": 800500 }, { "epoch": 5.03, "learning_rate": 3.743100990820158e-05, "loss": 3.1144, "step": 801000 }, { "epoch": 5.03, "learning_rate": 3.742316120606297e-05, "loss": 3.114, "step": 801500 }, { "epoch": 5.04, "learning_rate": 3.7415312503924356e-05, "loss": 3.1203, "step": 802000 }, { "epoch": 5.04, "learning_rate": 3.740746380178574e-05, "loss": 3.1202, "step": 802500 }, { "epoch": 5.04, "learning_rate": 3.7399615099647126e-05, "loss": 3.112, "step": 803000 }, { "epoch": 5.05, "learning_rate": 3.739176639750851e-05, "loss": 3.1109, "step": 803500 }, { "epoch": 5.05, "learning_rate": 3.7383917695369895e-05, "loss": 3.1239, "step": 804000 }, { "epoch": 5.05, "learning_rate": 3.737606899323128e-05, "loss": 3.1173, "step": 804500 }, { "epoch": 5.05, "learning_rate": 3.7368220291092665e-05, "loss": 3.1137, "step": 805000 }, { "epoch": 5.06, "learning_rate": 3.736037158895405e-05, "loss": 3.1156, "step": 805500 }, { "epoch": 5.06, "learning_rate": 3.735252288681544e-05, "loss": 3.115, "step": 806000 }, { "epoch": 5.06, "learning_rate": 3.73446898820811e-05, "loss": 3.1225, "step": 806500 }, { "epoch": 5.07, "learning_rate": 3.733684117994249e-05, "loss": 3.1208, "step": 807000 }, { "epoch": 5.07, "learning_rate": 3.732899247780387e-05, "loss": 3.1208, "step": 807500 }, { "epoch": 5.07, "learning_rate": 3.732114377566526e-05, "loss": 3.12, "step": 808000 }, { "epoch": 5.08, "learning_rate": 3.731331077093092e-05, "loss": 3.1213, "step": 808500 }, { "epoch": 5.08, "learning_rate": 3.730546206879231e-05, "loss": 3.1234, "step": 809000 }, { "epoch": 5.08, "learning_rate": 3.7297644761462244e-05, "loss": 3.126, "step": 809500 }, { "epoch": 5.09, "learning_rate": 3.728979605932363e-05, "loss": 3.1232, "step": 810000 }, { "epoch": 5.09, "eval_accuracy": 0.44245848311008823, "eval_loss": 3.047654867172241, "eval_runtime": 1452.7812, "eval_samples_per_second": 57.683, "eval_steps_per_second": 5.769, "step": 810000 }, { "epoch": 5.09, "learning_rate": 3.728194735718501e-05, "loss": 3.1187, "step": 810500 }, { "epoch": 5.09, "learning_rate": 3.72740986550464e-05, "loss": 3.1156, "step": 811000 }, { "epoch": 5.1, "learning_rate": 3.726624995290779e-05, "loss": 3.119, "step": 811500 }, { "epoch": 5.1, "learning_rate": 3.725840125076918e-05, "loss": 3.1169, "step": 812000 }, { "epoch": 5.1, "learning_rate": 3.7250552548630566e-05, "loss": 3.1208, "step": 812500 }, { "epoch": 5.1, "learning_rate": 3.724270384649195e-05, "loss": 3.1191, "step": 813000 }, { "epoch": 5.11, "learning_rate": 3.723485514435333e-05, "loss": 3.1272, "step": 813500 }, { "epoch": 5.11, "learning_rate": 3.722700644221472e-05, "loss": 3.1141, "step": 814000 }, { "epoch": 5.11, "learning_rate": 3.7219157740076105e-05, "loss": 3.1242, "step": 814500 }, { "epoch": 5.12, "learning_rate": 3.7211309037937486e-05, "loss": 3.1238, "step": 815000 }, { "epoch": 5.12, "learning_rate": 3.7203460335798874e-05, "loss": 3.1212, "step": 815500 }, { "epoch": 5.12, "learning_rate": 3.7195627331064536e-05, "loss": 3.1188, "step": 816000 }, { "epoch": 5.13, "learning_rate": 3.7187778628925924e-05, "loss": 3.116, "step": 816500 }, { "epoch": 5.13, "learning_rate": 3.717992992678731e-05, "loss": 3.1306, "step": 817000 }, { "epoch": 5.13, "learning_rate": 3.717208122464869e-05, "loss": 3.1274, "step": 817500 }, { "epoch": 5.14, "learning_rate": 3.7164248219914355e-05, "loss": 3.1219, "step": 818000 }, { "epoch": 5.14, "learning_rate": 3.715639951777574e-05, "loss": 3.121, "step": 818500 }, { "epoch": 5.14, "learning_rate": 3.714855081563713e-05, "loss": 3.1167, "step": 819000 }, { "epoch": 5.15, "learning_rate": 3.714070211349851e-05, "loss": 3.1259, "step": 819500 }, { "epoch": 5.15, "learning_rate": 3.713286910876418e-05, "loss": 3.1226, "step": 820000 }, { "epoch": 5.15, "learning_rate": 3.712502040662556e-05, "loss": 3.1229, "step": 820500 }, { "epoch": 5.16, "learning_rate": 3.711717170448695e-05, "loss": 3.1212, "step": 821000 }, { "epoch": 5.16, "learning_rate": 3.710933869975261e-05, "loss": 3.1187, "step": 821500 }, { "epoch": 5.16, "learning_rate": 3.7101489997614e-05, "loss": 3.1256, "step": 822000 }, { "epoch": 5.16, "learning_rate": 3.709364129547539e-05, "loss": 3.122, "step": 822500 }, { "epoch": 5.17, "learning_rate": 3.708579259333677e-05, "loss": 3.1265, "step": 823000 }, { "epoch": 5.17, "learning_rate": 3.707794389119815e-05, "loss": 3.1224, "step": 823500 }, { "epoch": 5.17, "learning_rate": 3.707009518905954e-05, "loss": 3.125, "step": 824000 }, { "epoch": 5.18, "learning_rate": 3.7062246486920927e-05, "loss": 3.1192, "step": 824500 }, { "epoch": 5.18, "learning_rate": 3.705439778478231e-05, "loss": 3.1209, "step": 825000 }, { "epoch": 5.18, "learning_rate": 3.7046549082643696e-05, "loss": 3.1209, "step": 825500 }, { "epoch": 5.19, "learning_rate": 3.703871607790936e-05, "loss": 3.1274, "step": 826000 }, { "epoch": 5.19, "learning_rate": 3.7030867375770745e-05, "loss": 3.1258, "step": 826500 }, { "epoch": 5.19, "learning_rate": 3.702303437103641e-05, "loss": 3.1182, "step": 827000 }, { "epoch": 5.2, "learning_rate": 3.7015185668897795e-05, "loss": 3.1229, "step": 827500 }, { "epoch": 5.2, "learning_rate": 3.7007352664163456e-05, "loss": 3.1256, "step": 828000 }, { "epoch": 5.2, "learning_rate": 3.699950396202484e-05, "loss": 3.1197, "step": 828500 }, { "epoch": 5.21, "learning_rate": 3.6991655259886226e-05, "loss": 3.1215, "step": 829000 }, { "epoch": 5.21, "learning_rate": 3.6983806557747614e-05, "loss": 3.119, "step": 829500 }, { "epoch": 5.21, "learning_rate": 3.6975957855609e-05, "loss": 3.1197, "step": 830000 }, { "epoch": 5.21, "learning_rate": 3.6968109153470383e-05, "loss": 3.1275, "step": 830500 }, { "epoch": 5.22, "learning_rate": 3.696026045133177e-05, "loss": 3.112, "step": 831000 }, { "epoch": 5.22, "learning_rate": 3.695241174919315e-05, "loss": 3.1154, "step": 831500 }, { "epoch": 5.22, "learning_rate": 3.694457874445882e-05, "loss": 3.131, "step": 832000 }, { "epoch": 5.23, "learning_rate": 3.69367300423202e-05, "loss": 3.1219, "step": 832500 }, { "epoch": 5.23, "learning_rate": 3.6928881340181584e-05, "loss": 3.1148, "step": 833000 }, { "epoch": 5.23, "learning_rate": 3.692103263804297e-05, "loss": 3.1223, "step": 833500 }, { "epoch": 5.24, "learning_rate": 3.691318393590436e-05, "loss": 3.1204, "step": 834000 }, { "epoch": 5.24, "learning_rate": 3.690533523376575e-05, "loss": 3.1246, "step": 834500 }, { "epoch": 5.24, "learning_rate": 3.6897486531627136e-05, "loss": 3.119, "step": 835000 }, { "epoch": 5.25, "learning_rate": 3.688963782948852e-05, "loss": 3.1212, "step": 835500 }, { "epoch": 5.25, "learning_rate": 3.6881789127349906e-05, "loss": 3.1239, "step": 836000 }, { "epoch": 5.25, "learning_rate": 3.687395612261557e-05, "loss": 3.1208, "step": 836500 }, { "epoch": 5.26, "learning_rate": 3.6866107420476955e-05, "loss": 3.1221, "step": 837000 }, { "epoch": 5.26, "learning_rate": 3.685825871833834e-05, "loss": 3.1274, "step": 837500 }, { "epoch": 5.26, "learning_rate": 3.685041001619972e-05, "loss": 3.1234, "step": 838000 }, { "epoch": 5.26, "learning_rate": 3.6842561314061106e-05, "loss": 3.1279, "step": 838500 }, { "epoch": 5.27, "learning_rate": 3.6834728309326774e-05, "loss": 3.1242, "step": 839000 }, { "epoch": 5.27, "learning_rate": 3.6826879607188156e-05, "loss": 3.1191, "step": 839500 }, { "epoch": 5.27, "learning_rate": 3.6819030905049544e-05, "loss": 3.128, "step": 840000 }, { "epoch": 5.27, "eval_accuracy": 0.44285846713872246, "eval_loss": 3.0442583560943604, "eval_runtime": 1444.8872, "eval_samples_per_second": 57.998, "eval_steps_per_second": 5.8, "step": 840000 }, { "epoch": 5.28, "learning_rate": 3.6811182202910925e-05, "loss": 3.125, "step": 840500 }, { "epoch": 5.28, "learning_rate": 3.680333350077231e-05, "loss": 3.126, "step": 841000 }, { "epoch": 5.28, "learning_rate": 3.6795500496037975e-05, "loss": 3.1248, "step": 841500 }, { "epoch": 5.29, "learning_rate": 3.678765179389936e-05, "loss": 3.125, "step": 842000 }, { "epoch": 5.29, "learning_rate": 3.677980309176075e-05, "loss": 3.1288, "step": 842500 }, { "epoch": 5.29, "learning_rate": 3.677195438962213e-05, "loss": 3.124, "step": 843000 }, { "epoch": 5.3, "learning_rate": 3.6764121384887794e-05, "loss": 3.1226, "step": 843500 }, { "epoch": 5.3, "learning_rate": 3.675627268274918e-05, "loss": 3.1221, "step": 844000 }, { "epoch": 5.3, "learning_rate": 3.674842398061057e-05, "loss": 3.1127, "step": 844500 }, { "epoch": 5.31, "learning_rate": 3.674057527847196e-05, "loss": 3.1228, "step": 845000 }, { "epoch": 5.31, "learning_rate": 3.673272657633334e-05, "loss": 3.1212, "step": 845500 }, { "epoch": 5.31, "learning_rate": 3.672487787419473e-05, "loss": 3.1237, "step": 846000 }, { "epoch": 5.32, "learning_rate": 3.671702917205611e-05, "loss": 3.1207, "step": 846500 }, { "epoch": 5.32, "learning_rate": 3.670919616732178e-05, "loss": 3.1235, "step": 847000 }, { "epoch": 5.32, "learning_rate": 3.670136316258744e-05, "loss": 3.1283, "step": 847500 }, { "epoch": 5.32, "learning_rate": 3.66935301578531e-05, "loss": 3.1219, "step": 848000 }, { "epoch": 5.33, "learning_rate": 3.668568145571448e-05, "loss": 3.1224, "step": 848500 }, { "epoch": 5.33, "learning_rate": 3.667783275357587e-05, "loss": 3.1188, "step": 849000 }, { "epoch": 5.33, "learning_rate": 3.666998405143726e-05, "loss": 3.1215, "step": 849500 }, { "epoch": 5.34, "learning_rate": 3.6662135349298645e-05, "loss": 3.1214, "step": 850000 }, { "epoch": 5.34, "learning_rate": 3.665428664716003e-05, "loss": 3.1173, "step": 850500 }, { "epoch": 5.34, "learning_rate": 3.664643794502141e-05, "loss": 3.1245, "step": 851000 }, { "epoch": 5.35, "learning_rate": 3.6638589242882796e-05, "loss": 3.1238, "step": 851500 }, { "epoch": 5.35, "learning_rate": 3.6630740540744184e-05, "loss": 3.1283, "step": 852000 }, { "epoch": 5.35, "learning_rate": 3.662289183860557e-05, "loss": 3.1233, "step": 852500 }, { "epoch": 5.36, "learning_rate": 3.6615043136466954e-05, "loss": 3.1174, "step": 853000 }, { "epoch": 5.36, "learning_rate": 3.660719443432834e-05, "loss": 3.1234, "step": 853500 }, { "epoch": 5.36, "learning_rate": 3.659934573218973e-05, "loss": 3.1216, "step": 854000 }, { "epoch": 5.37, "learning_rate": 3.659151272745539e-05, "loss": 3.128, "step": 854500 }, { "epoch": 5.37, "learning_rate": 3.658367972272105e-05, "loss": 3.128, "step": 855000 }, { "epoch": 5.37, "learning_rate": 3.657583102058244e-05, "loss": 3.1231, "step": 855500 }, { "epoch": 5.37, "learning_rate": 3.656798231844382e-05, "loss": 3.1218, "step": 856000 }, { "epoch": 5.38, "learning_rate": 3.656013361630521e-05, "loss": 3.1217, "step": 856500 }, { "epoch": 5.38, "learning_rate": 3.655230061157087e-05, "loss": 3.1216, "step": 857000 }, { "epoch": 5.38, "learning_rate": 3.654445190943226e-05, "loss": 3.1224, "step": 857500 }, { "epoch": 5.39, "learning_rate": 3.653660320729365e-05, "loss": 3.117, "step": 858000 }, { "epoch": 5.39, "learning_rate": 3.652875450515503e-05, "loss": 3.1182, "step": 858500 }, { "epoch": 5.39, "learning_rate": 3.652090580301641e-05, "loss": 3.1253, "step": 859000 }, { "epoch": 5.4, "learning_rate": 3.65130571008778e-05, "loss": 3.1134, "step": 859500 }, { "epoch": 5.4, "learning_rate": 3.650520839873919e-05, "loss": 3.1198, "step": 860000 }, { "epoch": 5.4, "learning_rate": 3.649735969660057e-05, "loss": 3.1241, "step": 860500 }, { "epoch": 5.41, "learning_rate": 3.648952669186623e-05, "loss": 3.1307, "step": 861000 }, { "epoch": 5.41, "learning_rate": 3.64816936871319e-05, "loss": 3.123, "step": 861500 }, { "epoch": 5.41, "learning_rate": 3.6473844984993286e-05, "loss": 3.119, "step": 862000 }, { "epoch": 5.42, "learning_rate": 3.646599628285467e-05, "loss": 3.1179, "step": 862500 }, { "epoch": 5.42, "learning_rate": 3.6458147580716056e-05, "loss": 3.1228, "step": 863000 }, { "epoch": 5.42, "learning_rate": 3.645029887857744e-05, "loss": 3.1218, "step": 863500 }, { "epoch": 5.43, "learning_rate": 3.6442450176438825e-05, "loss": 3.1228, "step": 864000 }, { "epoch": 5.43, "learning_rate": 3.643460147430021e-05, "loss": 3.1279, "step": 864500 }, { "epoch": 5.43, "learning_rate": 3.64267527721616e-05, "loss": 3.124, "step": 865000 }, { "epoch": 5.43, "learning_rate": 3.641890407002298e-05, "loss": 3.124, "step": 865500 }, { "epoch": 5.44, "learning_rate": 3.6411055367884364e-05, "loss": 3.1252, "step": 866000 }, { "epoch": 5.44, "learning_rate": 3.640320666574575e-05, "loss": 3.1124, "step": 866500 }, { "epoch": 5.44, "learning_rate": 3.639535796360714e-05, "loss": 3.1218, "step": 867000 }, { "epoch": 5.45, "learning_rate": 3.63875249588728e-05, "loss": 3.1248, "step": 867500 }, { "epoch": 5.45, "learning_rate": 3.637969195413847e-05, "loss": 3.1213, "step": 868000 }, { "epoch": 5.45, "learning_rate": 3.637184325199985e-05, "loss": 3.1277, "step": 868500 }, { "epoch": 5.46, "learning_rate": 3.636399454986123e-05, "loss": 3.1196, "step": 869000 }, { "epoch": 5.46, "learning_rate": 3.635614584772262e-05, "loss": 3.1221, "step": 869500 }, { "epoch": 5.46, "learning_rate": 3.634829714558401e-05, "loss": 3.1241, "step": 870000 }, { "epoch": 5.46, "eval_accuracy": 0.4433893295809256, "eval_loss": 3.039578437805176, "eval_runtime": 1450.0695, "eval_samples_per_second": 57.791, "eval_steps_per_second": 5.78, "step": 870000 }, { "epoch": 5.47, "learning_rate": 3.63404484434454e-05, "loss": 3.1186, "step": 870500 }, { "epoch": 5.47, "learning_rate": 3.633259974130678e-05, "loss": 3.1203, "step": 871000 }, { "epoch": 5.47, "learning_rate": 3.6324751039168166e-05, "loss": 3.1213, "step": 871500 }, { "epoch": 5.48, "learning_rate": 3.631691803443383e-05, "loss": 3.1174, "step": 872000 }, { "epoch": 5.48, "learning_rate": 3.630908502969949e-05, "loss": 3.1189, "step": 872500 }, { "epoch": 5.48, "learning_rate": 3.630123632756088e-05, "loss": 3.1203, "step": 873000 }, { "epoch": 5.48, "learning_rate": 3.629338762542226e-05, "loss": 3.1204, "step": 873500 }, { "epoch": 5.49, "learning_rate": 3.628553892328365e-05, "loss": 3.1199, "step": 874000 }, { "epoch": 5.49, "learning_rate": 3.6277690221145035e-05, "loss": 3.125, "step": 874500 }, { "epoch": 5.49, "learning_rate": 3.626984151900642e-05, "loss": 3.1226, "step": 875000 }, { "epoch": 5.5, "learning_rate": 3.6262008514272084e-05, "loss": 3.1235, "step": 875500 }, { "epoch": 5.5, "learning_rate": 3.6254159812133466e-05, "loss": 3.1183, "step": 876000 }, { "epoch": 5.5, "learning_rate": 3.6246311109994854e-05, "loss": 3.1275, "step": 876500 }, { "epoch": 5.51, "learning_rate": 3.6238462407856235e-05, "loss": 3.1231, "step": 877000 }, { "epoch": 5.51, "learning_rate": 3.623061370571762e-05, "loss": 3.1271, "step": 877500 }, { "epoch": 5.51, "learning_rate": 3.622276500357901e-05, "loss": 3.1163, "step": 878000 }, { "epoch": 5.52, "learning_rate": 3.621491630144039e-05, "loss": 3.1216, "step": 878500 }, { "epoch": 5.52, "learning_rate": 3.620706759930178e-05, "loss": 3.1192, "step": 879000 }, { "epoch": 5.52, "learning_rate": 3.619921889716317e-05, "loss": 3.1238, "step": 879500 }, { "epoch": 5.53, "learning_rate": 3.619138589242883e-05, "loss": 3.1262, "step": 880000 }, { "epoch": 5.53, "learning_rate": 3.618353719029022e-05, "loss": 3.122, "step": 880500 }, { "epoch": 5.53, "learning_rate": 3.61756884881516e-05, "loss": 3.1252, "step": 881000 }, { "epoch": 5.53, "learning_rate": 3.616785548341726e-05, "loss": 3.1181, "step": 881500 }, { "epoch": 5.54, "learning_rate": 3.616000678127865e-05, "loss": 3.1166, "step": 882000 }, { "epoch": 5.54, "learning_rate": 3.615217377654431e-05, "loss": 3.1208, "step": 882500 }, { "epoch": 5.54, "learning_rate": 3.61443250744057e-05, "loss": 3.1171, "step": 883000 }, { "epoch": 5.55, "learning_rate": 3.613647637226709e-05, "loss": 3.1205, "step": 883500 }, { "epoch": 5.55, "learning_rate": 3.612862767012847e-05, "loss": 3.1138, "step": 884000 }, { "epoch": 5.55, "learning_rate": 3.6120778967989857e-05, "loss": 3.1103, "step": 884500 }, { "epoch": 5.56, "learning_rate": 3.6112930265851245e-05, "loss": 3.1185, "step": 885000 }, { "epoch": 5.56, "learning_rate": 3.6105081563712626e-05, "loss": 3.1273, "step": 885500 }, { "epoch": 5.56, "learning_rate": 3.609723286157401e-05, "loss": 3.1191, "step": 886000 }, { "epoch": 5.57, "learning_rate": 3.6089384159435396e-05, "loss": 3.114, "step": 886500 }, { "epoch": 5.57, "learning_rate": 3.6081535457296784e-05, "loss": 3.1182, "step": 887000 }, { "epoch": 5.57, "learning_rate": 3.607368675515817e-05, "loss": 3.1282, "step": 887500 }, { "epoch": 5.58, "learning_rate": 3.606583805301955e-05, "loss": 3.121, "step": 888000 }, { "epoch": 5.58, "learning_rate": 3.605798935088094e-05, "loss": 3.1212, "step": 888500 }, { "epoch": 5.58, "learning_rate": 3.60501563461466e-05, "loss": 3.123, "step": 889000 }, { "epoch": 5.59, "learning_rate": 3.6042323341412264e-05, "loss": 3.1142, "step": 889500 }, { "epoch": 5.59, "learning_rate": 3.603447463927365e-05, "loss": 3.1242, "step": 890000 }, { "epoch": 5.59, "learning_rate": 3.602662593713504e-05, "loss": 3.1188, "step": 890500 }, { "epoch": 5.59, "learning_rate": 3.601877723499642e-05, "loss": 3.1183, "step": 891000 }, { "epoch": 5.6, "learning_rate": 3.601092853285781e-05, "loss": 3.1276, "step": 891500 }, { "epoch": 5.6, "learning_rate": 3.600307983071919e-05, "loss": 3.127, "step": 892000 }, { "epoch": 5.6, "learning_rate": 3.599523112858058e-05, "loss": 3.1173, "step": 892500 }, { "epoch": 5.61, "learning_rate": 3.598738242644197e-05, "loss": 3.1158, "step": 893000 }, { "epoch": 5.61, "learning_rate": 3.597954942170763e-05, "loss": 3.1269, "step": 893500 }, { "epoch": 5.61, "learning_rate": 3.597170071956901e-05, "loss": 3.1163, "step": 894000 }, { "epoch": 5.62, "learning_rate": 3.59638520174304e-05, "loss": 3.1251, "step": 894500 }, { "epoch": 5.62, "learning_rate": 3.5956003315291786e-05, "loss": 3.1212, "step": 895000 }, { "epoch": 5.62, "learning_rate": 3.5948154613153174e-05, "loss": 3.1219, "step": 895500 }, { "epoch": 5.63, "learning_rate": 3.5940337305823116e-05, "loss": 3.1208, "step": 896000 }, { "epoch": 5.63, "learning_rate": 3.59324886036845e-05, "loss": 3.1168, "step": 896500 }, { "epoch": 5.63, "learning_rate": 3.592463990154588e-05, "loss": 3.1208, "step": 897000 }, { "epoch": 5.64, "learning_rate": 3.591679119940727e-05, "loss": 3.1253, "step": 897500 }, { "epoch": 5.64, "learning_rate": 3.5908942497268655e-05, "loss": 3.1178, "step": 898000 }, { "epoch": 5.64, "learning_rate": 3.5901093795130036e-05, "loss": 3.1168, "step": 898500 }, { "epoch": 5.64, "learning_rate": 3.5893245092991424e-05, "loss": 3.1242, "step": 899000 }, { "epoch": 5.65, "learning_rate": 3.588539639085281e-05, "loss": 3.1212, "step": 899500 }, { "epoch": 5.65, "learning_rate": 3.5877547688714194e-05, "loss": 3.1235, "step": 900000 }, { "epoch": 5.65, "eval_accuracy": 0.44393963714928014, "eval_loss": 3.0349690914154053, "eval_runtime": 1452.1848, "eval_samples_per_second": 57.707, "eval_steps_per_second": 5.771, "step": 900000 }, { "epoch": 5.65, "learning_rate": 3.586971468397986e-05, "loss": 3.1176, "step": 900500 }, { "epoch": 5.66, "learning_rate": 3.586186598184124e-05, "loss": 3.1254, "step": 901000 }, { "epoch": 5.66, "learning_rate": 3.5854032977106905e-05, "loss": 3.121, "step": 901500 }, { "epoch": 5.66, "learning_rate": 3.5846199972372566e-05, "loss": 3.1134, "step": 902000 }, { "epoch": 5.67, "learning_rate": 3.5838351270233954e-05, "loss": 3.1167, "step": 902500 }, { "epoch": 5.67, "learning_rate": 3.583050256809534e-05, "loss": 3.1256, "step": 903000 }, { "epoch": 5.67, "learning_rate": 3.582265386595673e-05, "loss": 3.116, "step": 903500 }, { "epoch": 5.68, "learning_rate": 3.581480516381811e-05, "loss": 3.1141, "step": 904000 }, { "epoch": 5.68, "learning_rate": 3.58069564616795e-05, "loss": 3.119, "step": 904500 }, { "epoch": 5.68, "learning_rate": 3.579910775954088e-05, "loss": 3.118, "step": 905000 }, { "epoch": 5.69, "learning_rate": 3.579125905740227e-05, "loss": 3.1161, "step": 905500 }, { "epoch": 5.69, "learning_rate": 3.578342605266794e-05, "loss": 3.1252, "step": 906000 }, { "epoch": 5.69, "learning_rate": 3.577557735052932e-05, "loss": 3.1152, "step": 906500 }, { "epoch": 5.7, "learning_rate": 3.57677286483907e-05, "loss": 3.1202, "step": 907000 }, { "epoch": 5.7, "learning_rate": 3.575987994625209e-05, "loss": 3.1158, "step": 907500 }, { "epoch": 5.7, "learning_rate": 3.5752031244113476e-05, "loss": 3.1174, "step": 908000 }, { "epoch": 5.7, "learning_rate": 3.5744182541974865e-05, "loss": 3.1235, "step": 908500 }, { "epoch": 5.71, "learning_rate": 3.5736333839836246e-05, "loss": 3.1221, "step": 909000 }, { "epoch": 5.71, "learning_rate": 3.5728485137697634e-05, "loss": 3.11, "step": 909500 }, { "epoch": 5.71, "learning_rate": 3.5720652132963295e-05, "loss": 3.12, "step": 910000 }, { "epoch": 5.72, "learning_rate": 3.5712803430824684e-05, "loss": 3.1297, "step": 910500 }, { "epoch": 5.72, "learning_rate": 3.570495472868607e-05, "loss": 3.1154, "step": 911000 }, { "epoch": 5.72, "learning_rate": 3.569710602654745e-05, "loss": 3.1189, "step": 911500 }, { "epoch": 5.73, "learning_rate": 3.5689273021813114e-05, "loss": 3.1079, "step": 912000 }, { "epoch": 5.73, "learning_rate": 3.56814243196745e-05, "loss": 3.1071, "step": 912500 }, { "epoch": 5.73, "learning_rate": 3.5673575617535884e-05, "loss": 3.1146, "step": 913000 }, { "epoch": 5.74, "learning_rate": 3.566572691539727e-05, "loss": 3.1202, "step": 913500 }, { "epoch": 5.74, "learning_rate": 3.5657878213258653e-05, "loss": 3.1138, "step": 914000 }, { "epoch": 5.74, "learning_rate": 3.565002951112004e-05, "loss": 3.1158, "step": 914500 }, { "epoch": 5.75, "learning_rate": 3.564218080898143e-05, "loss": 3.1087, "step": 915000 }, { "epoch": 5.75, "learning_rate": 3.563433210684282e-05, "loss": 3.1113, "step": 915500 }, { "epoch": 5.75, "learning_rate": 3.562649910210848e-05, "loss": 3.1149, "step": 916000 }, { "epoch": 5.75, "learning_rate": 3.561865039996986e-05, "loss": 3.1118, "step": 916500 }, { "epoch": 5.76, "learning_rate": 3.561080169783125e-05, "loss": 3.1198, "step": 917000 }, { "epoch": 5.76, "learning_rate": 3.560295299569264e-05, "loss": 3.1187, "step": 917500 }, { "epoch": 5.76, "learning_rate": 3.559510429355402e-05, "loss": 3.1155, "step": 918000 }, { "epoch": 5.77, "learning_rate": 3.55872555914154e-05, "loss": 3.119, "step": 918500 }, { "epoch": 5.77, "learning_rate": 3.557940688927679e-05, "loss": 3.1237, "step": 919000 }, { "epoch": 5.77, "learning_rate": 3.5571573884542456e-05, "loss": 3.1234, "step": 919500 }, { "epoch": 5.78, "learning_rate": 3.556372518240384e-05, "loss": 3.1141, "step": 920000 }, { "epoch": 5.78, "learning_rate": 3.5555876480265225e-05, "loss": 3.1127, "step": 920500 }, { "epoch": 5.78, "learning_rate": 3.5548027778126607e-05, "loss": 3.1195, "step": 921000 }, { "epoch": 5.79, "learning_rate": 3.5540179075987995e-05, "loss": 3.1127, "step": 921500 }, { "epoch": 5.79, "learning_rate": 3.553233037384938e-05, "loss": 3.1187, "step": 922000 }, { "epoch": 5.79, "learning_rate": 3.552448167171077e-05, "loss": 3.1171, "step": 922500 }, { "epoch": 5.8, "learning_rate": 3.551663296957215e-05, "loss": 3.1124, "step": 923000 }, { "epoch": 5.8, "learning_rate": 3.5508799964837814e-05, "loss": 3.1153, "step": 923500 }, { "epoch": 5.8, "learning_rate": 3.55009512626992e-05, "loss": 3.1116, "step": 924000 }, { "epoch": 5.8, "learning_rate": 3.549310256056059e-05, "loss": 3.1248, "step": 924500 }, { "epoch": 5.81, "learning_rate": 3.548525385842197e-05, "loss": 3.1204, "step": 925000 }, { "epoch": 5.81, "learning_rate": 3.547742085368764e-05, "loss": 3.114, "step": 925500 }, { "epoch": 5.81, "learning_rate": 3.546957215154903e-05, "loss": 3.1137, "step": 926000 }, { "epoch": 5.82, "learning_rate": 3.546172344941041e-05, "loss": 3.1114, "step": 926500 }, { "epoch": 5.82, "learning_rate": 3.545387474727179e-05, "loss": 3.1189, "step": 927000 }, { "epoch": 5.82, "learning_rate": 3.544602604513318e-05, "loss": 3.113, "step": 927500 }, { "epoch": 5.83, "learning_rate": 3.5438177342994567e-05, "loss": 3.1227, "step": 928000 }, { "epoch": 5.83, "learning_rate": 3.543034433826023e-05, "loss": 3.1147, "step": 928500 }, { "epoch": 5.83, "learning_rate": 3.542249563612161e-05, "loss": 3.1185, "step": 929000 }, { "epoch": 5.84, "learning_rate": 3.541466263138728e-05, "loss": 3.1095, "step": 929500 }, { "epoch": 5.84, "learning_rate": 3.540681392924866e-05, "loss": 3.1148, "step": 930000 }, { "epoch": 5.84, "eval_accuracy": 0.4444492814244349, "eval_loss": 3.030424118041992, "eval_runtime": 1450.8739, "eval_samples_per_second": 57.759, "eval_steps_per_second": 5.777, "step": 930000 }, { "epoch": 5.84, "learning_rate": 3.539896522711005e-05, "loss": 3.1233, "step": 930500 }, { "epoch": 5.85, "learning_rate": 3.5391116524971435e-05, "loss": 3.1114, "step": 931000 }, { "epoch": 5.85, "learning_rate": 3.5383267822832816e-05, "loss": 3.113, "step": 931500 }, { "epoch": 5.85, "learning_rate": 3.5375419120694205e-05, "loss": 3.1042, "step": 932000 }, { "epoch": 5.86, "learning_rate": 3.536757041855559e-05, "loss": 3.1191, "step": 932500 }, { "epoch": 5.86, "learning_rate": 3.5359721716416974e-05, "loss": 3.1196, "step": 933000 }, { "epoch": 5.86, "learning_rate": 3.5351873014278355e-05, "loss": 3.1168, "step": 933500 }, { "epoch": 5.86, "learning_rate": 3.5344024312139744e-05, "loss": 3.1131, "step": 934000 }, { "epoch": 5.87, "learning_rate": 3.533619130740541e-05, "loss": 3.1136, "step": 934500 }, { "epoch": 5.87, "learning_rate": 3.532834260526679e-05, "loss": 3.1154, "step": 935000 }, { "epoch": 5.87, "learning_rate": 3.532049390312818e-05, "loss": 3.121, "step": 935500 }, { "epoch": 5.88, "learning_rate": 3.531264520098956e-05, "loss": 3.1217, "step": 936000 }, { "epoch": 5.88, "learning_rate": 3.5304812196255224e-05, "loss": 3.1203, "step": 936500 }, { "epoch": 5.88, "learning_rate": 3.529696349411661e-05, "loss": 3.1081, "step": 937000 }, { "epoch": 5.89, "learning_rate": 3.5289114791978e-05, "loss": 3.112, "step": 937500 }, { "epoch": 5.89, "learning_rate": 3.528126608983939e-05, "loss": 3.1153, "step": 938000 }, { "epoch": 5.89, "learning_rate": 3.527341738770077e-05, "loss": 3.1101, "step": 938500 }, { "epoch": 5.9, "learning_rate": 3.526556868556216e-05, "loss": 3.1139, "step": 939000 }, { "epoch": 5.9, "learning_rate": 3.5257719983423546e-05, "loss": 3.1087, "step": 939500 }, { "epoch": 5.9, "learning_rate": 3.524988697868921e-05, "loss": 3.1219, "step": 940000 }, { "epoch": 5.91, "learning_rate": 3.5242038276550595e-05, "loss": 3.1152, "step": 940500 }, { "epoch": 5.91, "learning_rate": 3.523420527181626e-05, "loss": 3.1116, "step": 941000 }, { "epoch": 5.91, "learning_rate": 3.522635656967764e-05, "loss": 3.1076, "step": 941500 }, { "epoch": 5.91, "learning_rate": 3.5218507867539026e-05, "loss": 3.1176, "step": 942000 }, { "epoch": 5.92, "learning_rate": 3.5210659165400414e-05, "loss": 3.1138, "step": 942500 }, { "epoch": 5.92, "learning_rate": 3.5202810463261796e-05, "loss": 3.1122, "step": 943000 }, { "epoch": 5.92, "learning_rate": 3.519496176112318e-05, "loss": 3.1101, "step": 943500 }, { "epoch": 5.93, "learning_rate": 3.5187113058984565e-05, "loss": 3.1091, "step": 944000 }, { "epoch": 5.93, "learning_rate": 3.517926435684595e-05, "loss": 3.1126, "step": 944500 }, { "epoch": 5.93, "learning_rate": 3.517141565470734e-05, "loss": 3.1161, "step": 945000 }, { "epoch": 5.94, "learning_rate": 3.516356695256873e-05, "loss": 3.1115, "step": 945500 }, { "epoch": 5.94, "learning_rate": 3.515573394783439e-05, "loss": 3.1184, "step": 946000 }, { "epoch": 5.94, "learning_rate": 3.514788524569577e-05, "loss": 3.1149, "step": 946500 }, { "epoch": 5.95, "learning_rate": 3.5140052240961434e-05, "loss": 3.1121, "step": 947000 }, { "epoch": 5.95, "learning_rate": 3.513220353882282e-05, "loss": 3.1196, "step": 947500 }, { "epoch": 5.95, "learning_rate": 3.512435483668421e-05, "loss": 3.11, "step": 948000 }, { "epoch": 5.96, "learning_rate": 3.51165061345456e-05, "loss": 3.1127, "step": 948500 }, { "epoch": 5.96, "learning_rate": 3.510865743240698e-05, "loss": 3.1122, "step": 949000 }, { "epoch": 5.96, "learning_rate": 3.510080873026836e-05, "loss": 3.1101, "step": 949500 }, { "epoch": 5.97, "learning_rate": 3.509297572553403e-05, "loss": 3.1145, "step": 950000 }, { "epoch": 5.97, "learning_rate": 3.508512702339542e-05, "loss": 3.1119, "step": 950500 }, { "epoch": 5.97, "learning_rate": 3.50772783212568e-05, "loss": 3.1156, "step": 951000 }, { "epoch": 5.97, "learning_rate": 3.506942961911818e-05, "loss": 3.1138, "step": 951500 }, { "epoch": 5.98, "learning_rate": 3.506158091697957e-05, "loss": 3.1122, "step": 952000 }, { "epoch": 5.98, "learning_rate": 3.5053747912245236e-05, "loss": 3.1161, "step": 952500 }, { "epoch": 5.98, "learning_rate": 3.504589921010662e-05, "loss": 3.1131, "step": 953000 }, { "epoch": 5.99, "learning_rate": 3.5038050507968005e-05, "loss": 3.1137, "step": 953500 }, { "epoch": 5.99, "learning_rate": 3.503020180582939e-05, "loss": 3.1156, "step": 954000 }, { "epoch": 5.99, "learning_rate": 3.5022353103690775e-05, "loss": 3.1178, "step": 954500 }, { "epoch": 6.0, "learning_rate": 3.501450440155216e-05, "loss": 3.1124, "step": 955000 }, { "epoch": 6.0, "learning_rate": 3.5006671396817824e-05, "loss": 3.116, "step": 955500 }, { "epoch": 6.0, "learning_rate": 3.499882269467921e-05, "loss": 3.0941, "step": 956000 }, { "epoch": 6.01, "learning_rate": 3.4990973992540594e-05, "loss": 3.0927, "step": 956500 }, { "epoch": 6.01, "learning_rate": 3.498312529040198e-05, "loss": 3.0938, "step": 957000 }, { "epoch": 6.01, "learning_rate": 3.497527658826337e-05, "loss": 3.0953, "step": 957500 }, { "epoch": 6.02, "learning_rate": 3.496742788612475e-05, "loss": 3.0884, "step": 958000 }, { "epoch": 6.02, "learning_rate": 3.495957918398613e-05, "loss": 3.0934, "step": 958500 }, { "epoch": 6.02, "learning_rate": 3.495173048184752e-05, "loss": 3.0983, "step": 959000 }, { "epoch": 6.02, "learning_rate": 3.494389747711318e-05, "loss": 3.0965, "step": 959500 }, { "epoch": 6.03, "learning_rate": 3.493604877497457e-05, "loss": 3.0931, "step": 960000 }, { "epoch": 6.03, "eval_accuracy": 0.44508312021556723, "eval_loss": 3.0270707607269287, "eval_runtime": 1449.5334, "eval_samples_per_second": 57.812, "eval_steps_per_second": 5.782, "step": 960000 }, { "epoch": 6.03, "learning_rate": 3.492820007283596e-05, "loss": 3.0993, "step": 960500 }, { "epoch": 6.03, "learning_rate": 3.492035137069734e-05, "loss": 3.0948, "step": 961000 }, { "epoch": 6.04, "learning_rate": 3.491250266855873e-05, "loss": 3.1012, "step": 961500 }, { "epoch": 6.04, "learning_rate": 3.4904653966420116e-05, "loss": 3.1072, "step": 962000 }, { "epoch": 6.04, "learning_rate": 3.4896805264281504e-05, "loss": 3.1002, "step": 962500 }, { "epoch": 6.05, "learning_rate": 3.4888956562142886e-05, "loss": 3.105, "step": 963000 }, { "epoch": 6.05, "learning_rate": 3.488112355740855e-05, "loss": 3.0916, "step": 963500 }, { "epoch": 6.05, "learning_rate": 3.4873274855269935e-05, "loss": 3.1004, "step": 964000 }, { "epoch": 6.06, "learning_rate": 3.486542615313132e-05, "loss": 3.0898, "step": 964500 }, { "epoch": 6.06, "learning_rate": 3.4857577450992705e-05, "loss": 3.0985, "step": 965000 }, { "epoch": 6.06, "learning_rate": 3.484974444625837e-05, "loss": 3.1097, "step": 965500 }, { "epoch": 6.07, "learning_rate": 3.4841911441524034e-05, "loss": 3.0981, "step": 966000 }, { "epoch": 6.07, "learning_rate": 3.4834062739385416e-05, "loss": 3.0956, "step": 966500 }, { "epoch": 6.07, "learning_rate": 3.4826214037246804e-05, "loss": 3.0967, "step": 967000 }, { "epoch": 6.07, "learning_rate": 3.4818365335108185e-05, "loss": 3.0986, "step": 967500 }, { "epoch": 6.08, "learning_rate": 3.481051663296957e-05, "loss": 3.0922, "step": 968000 }, { "epoch": 6.08, "learning_rate": 3.480266793083096e-05, "loss": 3.1034, "step": 968500 }, { "epoch": 6.08, "learning_rate": 3.479481922869234e-05, "loss": 3.1039, "step": 969000 }, { "epoch": 6.09, "learning_rate": 3.4786986223958004e-05, "loss": 3.1021, "step": 969500 }, { "epoch": 6.09, "learning_rate": 3.477913752181939e-05, "loss": 3.101, "step": 970000 }, { "epoch": 6.09, "learning_rate": 3.477128881968078e-05, "loss": 3.0956, "step": 970500 }, { "epoch": 6.1, "learning_rate": 3.476344011754217e-05, "loss": 3.0954, "step": 971000 }, { "epoch": 6.1, "learning_rate": 3.475559141540355e-05, "loss": 3.1004, "step": 971500 }, { "epoch": 6.1, "learning_rate": 3.474774271326494e-05, "loss": 3.1002, "step": 972000 }, { "epoch": 6.11, "learning_rate": 3.47399097085306e-05, "loss": 3.0988, "step": 972500 }, { "epoch": 6.11, "learning_rate": 3.473206100639199e-05, "loss": 3.0993, "step": 973000 }, { "epoch": 6.11, "learning_rate": 3.472422800165765e-05, "loss": 3.1014, "step": 973500 }, { "epoch": 6.12, "learning_rate": 3.471637929951903e-05, "loss": 3.1007, "step": 974000 }, { "epoch": 6.12, "learning_rate": 3.470853059738042e-05, "loss": 3.1008, "step": 974500 }, { "epoch": 6.12, "learning_rate": 3.4700681895241806e-05, "loss": 3.0997, "step": 975000 }, { "epoch": 6.13, "learning_rate": 3.4692833193103195e-05, "loss": 3.0965, "step": 975500 }, { "epoch": 6.13, "learning_rate": 3.4684984490964576e-05, "loss": 3.0999, "step": 976000 }, { "epoch": 6.13, "learning_rate": 3.467713578882596e-05, "loss": 3.0942, "step": 976500 }, { "epoch": 6.13, "learning_rate": 3.4669287086687345e-05, "loss": 3.0999, "step": 977000 }, { "epoch": 6.14, "learning_rate": 3.4661438384548734e-05, "loss": 3.1015, "step": 977500 }, { "epoch": 6.14, "learning_rate": 3.465358968241012e-05, "loss": 3.1012, "step": 978000 }, { "epoch": 6.14, "learning_rate": 3.46457409802715e-05, "loss": 3.0963, "step": 978500 }, { "epoch": 6.15, "learning_rate": 3.463789227813289e-05, "loss": 3.0986, "step": 979000 }, { "epoch": 6.15, "learning_rate": 3.463005927339855e-05, "loss": 3.1042, "step": 979500 }, { "epoch": 6.15, "learning_rate": 3.462221057125994e-05, "loss": 3.0988, "step": 980000 }, { "epoch": 6.16, "learning_rate": 3.461436186912133e-05, "loss": 3.1008, "step": 980500 }, { "epoch": 6.16, "learning_rate": 3.460651316698271e-05, "loss": 3.0948, "step": 981000 }, { "epoch": 6.16, "learning_rate": 3.459866446484409e-05, "loss": 3.0996, "step": 981500 }, { "epoch": 6.17, "learning_rate": 3.459084715751403e-05, "loss": 3.106, "step": 982000 }, { "epoch": 6.17, "learning_rate": 3.458299845537542e-05, "loss": 3.1017, "step": 982500 }, { "epoch": 6.17, "learning_rate": 3.457516545064108e-05, "loss": 3.1043, "step": 983000 }, { "epoch": 6.18, "learning_rate": 3.456731674850247e-05, "loss": 3.0948, "step": 983500 }, { "epoch": 6.18, "learning_rate": 3.455946804636386e-05, "loss": 3.1063, "step": 984000 }, { "epoch": 6.18, "learning_rate": 3.455161934422524e-05, "loss": 3.1006, "step": 984500 }, { "epoch": 6.18, "learning_rate": 3.454377064208663e-05, "loss": 3.1012, "step": 985000 }, { "epoch": 6.19, "learning_rate": 3.453592193994801e-05, "loss": 3.1004, "step": 985500 }, { "epoch": 6.19, "learning_rate": 3.45280732378094e-05, "loss": 3.1024, "step": 986000 }, { "epoch": 6.19, "learning_rate": 3.452022453567078e-05, "loss": 3.1006, "step": 986500 }, { "epoch": 6.2, "learning_rate": 3.451239153093645e-05, "loss": 3.1018, "step": 987000 }, { "epoch": 6.2, "learning_rate": 3.450454282879783e-05, "loss": 3.0982, "step": 987500 }, { "epoch": 6.2, "learning_rate": 3.4496694126659217e-05, "loss": 3.0917, "step": 988000 }, { "epoch": 6.21, "learning_rate": 3.4488845424520605e-05, "loss": 3.1014, "step": 988500 }, { "epoch": 6.21, "learning_rate": 3.4480996722381986e-05, "loss": 3.0972, "step": 989000 }, { "epoch": 6.21, "learning_rate": 3.4473148020243374e-05, "loss": 3.0987, "step": 989500 }, { "epoch": 6.22, "learning_rate": 3.446529931810476e-05, "loss": 3.0979, "step": 990000 }, { "epoch": 6.22, "eval_accuracy": 0.44563202801479274, "eval_loss": 3.0247802734375, "eval_runtime": 1446.3701, "eval_samples_per_second": 57.939, "eval_steps_per_second": 5.795, "step": 990000 }, { "epoch": 6.22, "learning_rate": 3.4457450615966144e-05, "loss": 3.097, "step": 990500 }, { "epoch": 6.22, "learning_rate": 3.444961761123181e-05, "loss": 3.0963, "step": 991000 }, { "epoch": 6.23, "learning_rate": 3.444176890909319e-05, "loss": 3.0941, "step": 991500 }, { "epoch": 6.23, "learning_rate": 3.443392020695458e-05, "loss": 3.1043, "step": 992000 }, { "epoch": 6.23, "learning_rate": 3.442607150481596e-05, "loss": 3.1063, "step": 992500 }, { "epoch": 6.24, "learning_rate": 3.441822280267735e-05, "loss": 3.1036, "step": 993000 }, { "epoch": 6.24, "learning_rate": 3.441037410053874e-05, "loss": 3.1047, "step": 993500 }, { "epoch": 6.24, "learning_rate": 3.44025410958044e-05, "loss": 3.1004, "step": 994000 }, { "epoch": 6.24, "learning_rate": 3.439469239366578e-05, "loss": 3.0974, "step": 994500 }, { "epoch": 6.25, "learning_rate": 3.438684369152717e-05, "loss": 3.0992, "step": 995000 }, { "epoch": 6.25, "learning_rate": 3.437899498938856e-05, "loss": 3.1034, "step": 995500 }, { "epoch": 6.25, "learning_rate": 3.4371146287249946e-05, "loss": 3.1001, "step": 996000 }, { "epoch": 6.26, "learning_rate": 3.43633132825156e-05, "loss": 3.0946, "step": 996500 }, { "epoch": 6.26, "learning_rate": 3.435546458037699e-05, "loss": 3.0964, "step": 997000 }, { "epoch": 6.26, "learning_rate": 3.434761587823838e-05, "loss": 3.1001, "step": 997500 }, { "epoch": 6.27, "learning_rate": 3.4339767176099765e-05, "loss": 3.1045, "step": 998000 }, { "epoch": 6.27, "learning_rate": 3.433191847396115e-05, "loss": 3.0931, "step": 998500 }, { "epoch": 6.27, "learning_rate": 3.4324069771822535e-05, "loss": 3.0954, "step": 999000 }, { "epoch": 6.28, "learning_rate": 3.4316221069683916e-05, "loss": 3.0986, "step": 999500 }, { "epoch": 6.28, "learning_rate": 3.4308372367545304e-05, "loss": 3.0978, "step": 1000000 }, { "epoch": 6.28, "learning_rate": 3.430052366540669e-05, "loss": 3.1051, "step": 1000500 }, { "epoch": 6.29, "learning_rate": 3.4292674963268073e-05, "loss": 3.0981, "step": 1001000 }, { "epoch": 6.29, "learning_rate": 3.428482626112946e-05, "loss": 3.0955, "step": 1001500 }, { "epoch": 6.29, "learning_rate": 3.427699325639512e-05, "loss": 3.1064, "step": 1002000 }, { "epoch": 6.29, "learning_rate": 3.426914455425651e-05, "loss": 3.095, "step": 1002500 }, { "epoch": 6.3, "learning_rate": 3.42612958521179e-05, "loss": 3.1051, "step": 1003000 }, { "epoch": 6.3, "learning_rate": 3.425344714997928e-05, "loss": 3.0946, "step": 1003500 }, { "epoch": 6.3, "learning_rate": 3.424562984264922e-05, "loss": 3.0981, "step": 1004000 }, { "epoch": 6.31, "learning_rate": 3.42377811405106e-05, "loss": 3.1058, "step": 1004500 }, { "epoch": 6.31, "learning_rate": 3.422993243837199e-05, "loss": 3.0957, "step": 1005000 }, { "epoch": 6.31, "learning_rate": 3.422208373623338e-05, "loss": 3.1068, "step": 1005500 }, { "epoch": 6.32, "learning_rate": 3.421425073149904e-05, "loss": 3.0945, "step": 1006000 }, { "epoch": 6.32, "learning_rate": 3.420640202936043e-05, "loss": 3.1102, "step": 1006500 }, { "epoch": 6.32, "learning_rate": 3.419855332722181e-05, "loss": 3.096, "step": 1007000 }, { "epoch": 6.33, "learning_rate": 3.419072032248747e-05, "loss": 3.0998, "step": 1007500 }, { "epoch": 6.33, "learning_rate": 3.418287162034886e-05, "loss": 3.1008, "step": 1008000 }, { "epoch": 6.33, "learning_rate": 3.417502291821025e-05, "loss": 3.103, "step": 1008500 }, { "epoch": 6.34, "learning_rate": 3.4167174216071636e-05, "loss": 3.0973, "step": 1009000 }, { "epoch": 6.34, "learning_rate": 3.415932551393302e-05, "loss": 3.1001, "step": 1009500 }, { "epoch": 6.34, "learning_rate": 3.4151476811794406e-05, "loss": 3.0989, "step": 1010000 }, { "epoch": 6.34, "learning_rate": 3.414362810965579e-05, "loss": 3.1014, "step": 1010500 }, { "epoch": 6.35, "learning_rate": 3.4135779407517175e-05, "loss": 3.1058, "step": 1011000 }, { "epoch": 6.35, "learning_rate": 3.4127930705378557e-05, "loss": 3.1051, "step": 1011500 }, { "epoch": 6.35, "learning_rate": 3.4120097700644225e-05, "loss": 3.0944, "step": 1012000 }, { "epoch": 6.36, "learning_rate": 3.4112248998505606e-05, "loss": 3.1051, "step": 1012500 }, { "epoch": 6.36, "learning_rate": 3.4104400296366994e-05, "loss": 3.0958, "step": 1013000 }, { "epoch": 6.36, "learning_rate": 3.409655159422838e-05, "loss": 3.1063, "step": 1013500 }, { "epoch": 6.37, "learning_rate": 3.4088702892089764e-05, "loss": 3.1045, "step": 1014000 }, { "epoch": 6.37, "learning_rate": 3.408085418995115e-05, "loss": 3.0995, "step": 1014500 }, { "epoch": 6.37, "learning_rate": 3.407300548781254e-05, "loss": 3.0931, "step": 1015000 }, { "epoch": 6.38, "learning_rate": 3.40651724830782e-05, "loss": 3.1019, "step": 1015500 }, { "epoch": 6.38, "learning_rate": 3.405732378093959e-05, "loss": 3.0955, "step": 1016000 }, { "epoch": 6.38, "learning_rate": 3.404947507880097e-05, "loss": 3.1032, "step": 1016500 }, { "epoch": 6.39, "learning_rate": 3.404162637666236e-05, "loss": 3.1052, "step": 1017000 }, { "epoch": 6.39, "learning_rate": 3.403377767452374e-05, "loss": 3.0927, "step": 1017500 }, { "epoch": 6.39, "learning_rate": 3.402592897238513e-05, "loss": 3.099, "step": 1018000 }, { "epoch": 6.4, "learning_rate": 3.4018080270246516e-05, "loss": 3.0988, "step": 1018500 }, { "epoch": 6.4, "learning_rate": 3.40102315681079e-05, "loss": 3.103, "step": 1019000 }, { "epoch": 6.4, "learning_rate": 3.400239856337356e-05, "loss": 3.1029, "step": 1019500 }, { "epoch": 6.4, "learning_rate": 3.399454986123495e-05, "loss": 3.0985, "step": 1020000 }, { "epoch": 6.4, "eval_accuracy": 0.4459511637114799, "eval_loss": 3.0204365253448486, "eval_runtime": 1450.2231, "eval_samples_per_second": 57.785, "eval_steps_per_second": 5.779, "step": 1020000 }, { "epoch": 6.41, "learning_rate": 3.3986701159096335e-05, "loss": 3.0999, "step": 1020500 }, { "epoch": 6.41, "learning_rate": 3.3978852456957724e-05, "loss": 3.1027, "step": 1021000 }, { "epoch": 6.41, "learning_rate": 3.3971003754819105e-05, "loss": 3.0962, "step": 1021500 }, { "epoch": 6.42, "learning_rate": 3.3963170750084766e-05, "loss": 3.0908, "step": 1022000 }, { "epoch": 6.42, "learning_rate": 3.3955322047946154e-05, "loss": 3.1028, "step": 1022500 }, { "epoch": 6.42, "learning_rate": 3.394747334580754e-05, "loss": 3.1049, "step": 1023000 }, { "epoch": 6.43, "learning_rate": 3.3939624643668924e-05, "loss": 3.0949, "step": 1023500 }, { "epoch": 6.43, "learning_rate": 3.3931791638934585e-05, "loss": 3.0987, "step": 1024000 }, { "epoch": 6.43, "learning_rate": 3.392395863420025e-05, "loss": 3.0971, "step": 1024500 }, { "epoch": 6.44, "learning_rate": 3.3916109932061635e-05, "loss": 3.1059, "step": 1025000 }, { "epoch": 6.44, "learning_rate": 3.390826122992302e-05, "loss": 3.0972, "step": 1025500 }, { "epoch": 6.44, "learning_rate": 3.3900428225188684e-05, "loss": 3.0999, "step": 1026000 }, { "epoch": 6.45, "learning_rate": 3.389257952305007e-05, "loss": 3.0921, "step": 1026500 }, { "epoch": 6.45, "learning_rate": 3.3884730820911454e-05, "loss": 3.096, "step": 1027000 }, { "epoch": 6.45, "learning_rate": 3.387688211877284e-05, "loss": 3.0985, "step": 1027500 }, { "epoch": 6.45, "learning_rate": 3.386903341663423e-05, "loss": 3.0928, "step": 1028000 }, { "epoch": 6.46, "learning_rate": 3.386120041189989e-05, "loss": 3.0956, "step": 1028500 }, { "epoch": 6.46, "learning_rate": 3.385335170976128e-05, "loss": 3.1002, "step": 1029000 }, { "epoch": 6.46, "learning_rate": 3.384550300762266e-05, "loss": 3.1001, "step": 1029500 }, { "epoch": 6.47, "learning_rate": 3.383765430548404e-05, "loss": 3.0976, "step": 1030000 }, { "epoch": 6.47, "learning_rate": 3.382980560334543e-05, "loss": 3.1061, "step": 1030500 }, { "epoch": 6.47, "learning_rate": 3.382198829601537e-05, "loss": 3.107, "step": 1031000 }, { "epoch": 6.48, "learning_rate": 3.381413959387676e-05, "loss": 3.1032, "step": 1031500 }, { "epoch": 6.48, "learning_rate": 3.380629089173815e-05, "loss": 3.1018, "step": 1032000 }, { "epoch": 6.48, "learning_rate": 3.379844218959953e-05, "loss": 3.0959, "step": 1032500 }, { "epoch": 6.49, "learning_rate": 3.379059348746092e-05, "loss": 3.0964, "step": 1033000 }, { "epoch": 6.49, "learning_rate": 3.37827447853223e-05, "loss": 3.0956, "step": 1033500 }, { "epoch": 6.49, "learning_rate": 3.377489608318369e-05, "loss": 3.094, "step": 1034000 }, { "epoch": 6.5, "learning_rate": 3.376706307844935e-05, "loss": 3.1003, "step": 1034500 }, { "epoch": 6.5, "learning_rate": 3.375921437631073e-05, "loss": 3.0951, "step": 1035000 }, { "epoch": 6.5, "learning_rate": 3.37513813715764e-05, "loss": 3.1005, "step": 1035500 }, { "epoch": 6.51, "learning_rate": 3.3743532669437786e-05, "loss": 3.0986, "step": 1036000 }, { "epoch": 6.51, "learning_rate": 3.373568396729917e-05, "loss": 3.1101, "step": 1036500 }, { "epoch": 6.51, "learning_rate": 3.3727835265160555e-05, "loss": 3.1062, "step": 1037000 }, { "epoch": 6.51, "learning_rate": 3.371998656302194e-05, "loss": 3.0904, "step": 1037500 }, { "epoch": 6.52, "learning_rate": 3.3712137860883325e-05, "loss": 3.0949, "step": 1038000 }, { "epoch": 6.52, "learning_rate": 3.370428915874471e-05, "loss": 3.1039, "step": 1038500 }, { "epoch": 6.52, "learning_rate": 3.36964404566061e-05, "loss": 3.0958, "step": 1039000 }, { "epoch": 6.53, "learning_rate": 3.368859175446748e-05, "loss": 3.0957, "step": 1039500 }, { "epoch": 6.53, "learning_rate": 3.3680743052328864e-05, "loss": 3.0977, "step": 1040000 }, { "epoch": 6.53, "learning_rate": 3.367289435019025e-05, "loss": 3.0993, "step": 1040500 }, { "epoch": 6.54, "learning_rate": 3.366504564805164e-05, "loss": 3.0998, "step": 1041000 }, { "epoch": 6.54, "learning_rate": 3.365719694591303e-05, "loss": 3.1031, "step": 1041500 }, { "epoch": 6.54, "learning_rate": 3.364934824377441e-05, "loss": 3.0986, "step": 1042000 }, { "epoch": 6.55, "learning_rate": 3.36414995416358e-05, "loss": 3.102, "step": 1042500 }, { "epoch": 6.55, "learning_rate": 3.3633650839497186e-05, "loss": 3.1022, "step": 1043000 }, { "epoch": 6.55, "learning_rate": 3.362580213735857e-05, "loss": 3.101, "step": 1043500 }, { "epoch": 6.56, "learning_rate": 3.3617969132624235e-05, "loss": 3.1038, "step": 1044000 }, { "epoch": 6.56, "learning_rate": 3.361012043048562e-05, "loss": 3.1026, "step": 1044500 }, { "epoch": 6.56, "learning_rate": 3.3602271728347e-05, "loss": 3.0983, "step": 1045000 }, { "epoch": 6.56, "learning_rate": 3.3594423026208386e-05, "loss": 3.1019, "step": 1045500 }, { "epoch": 6.57, "learning_rate": 3.3586574324069774e-05, "loss": 3.0946, "step": 1046000 }, { "epoch": 6.57, "learning_rate": 3.3578725621931156e-05, "loss": 3.0943, "step": 1046500 }, { "epoch": 6.57, "learning_rate": 3.357089261719682e-05, "loss": 3.0984, "step": 1047000 }, { "epoch": 6.58, "learning_rate": 3.3563043915058205e-05, "loss": 3.0995, "step": 1047500 }, { "epoch": 6.58, "learning_rate": 3.355519521291959e-05, "loss": 3.0948, "step": 1048000 }, { "epoch": 6.58, "learning_rate": 3.354734651078098e-05, "loss": 3.0966, "step": 1048500 }, { "epoch": 6.59, "learning_rate": 3.353949780864237e-05, "loss": 3.1015, "step": 1049000 }, { "epoch": 6.59, "learning_rate": 3.353164910650375e-05, "loss": 3.1038, "step": 1049500 }, { "epoch": 6.59, "learning_rate": 3.352380040436513e-05, "loss": 3.101, "step": 1050000 }, { "epoch": 6.59, "eval_accuracy": 0.4464169485539236, "eval_loss": 3.017110586166382, "eval_runtime": 1450.8288, "eval_samples_per_second": 57.761, "eval_steps_per_second": 5.777, "step": 1050000 }, { "epoch": 6.6, "learning_rate": 3.351595170222652e-05, "loss": 3.0988, "step": 1050500 }, { "epoch": 6.6, "learning_rate": 3.350810300008791e-05, "loss": 3.1032, "step": 1051000 }, { "epoch": 6.6, "learning_rate": 3.350026999535357e-05, "loss": 3.1053, "step": 1051500 }, { "epoch": 6.61, "learning_rate": 3.349242129321495e-05, "loss": 3.1053, "step": 1052000 }, { "epoch": 6.61, "learning_rate": 3.348458828848062e-05, "loss": 3.0962, "step": 1052500 }, { "epoch": 6.61, "learning_rate": 3.3476739586342e-05, "loss": 3.1003, "step": 1053000 }, { "epoch": 6.61, "learning_rate": 3.346889088420339e-05, "loss": 3.09, "step": 1053500 }, { "epoch": 6.62, "learning_rate": 3.346104218206478e-05, "loss": 3.1034, "step": 1054000 }, { "epoch": 6.62, "learning_rate": 3.345319347992616e-05, "loss": 3.0989, "step": 1054500 }, { "epoch": 6.62, "learning_rate": 3.3445344777787547e-05, "loss": 3.0999, "step": 1055000 }, { "epoch": 6.63, "learning_rate": 3.343751177305321e-05, "loss": 3.0952, "step": 1055500 }, { "epoch": 6.63, "learning_rate": 3.3429663070914596e-05, "loss": 3.0993, "step": 1056000 }, { "epoch": 6.63, "learning_rate": 3.3421814368775984e-05, "loss": 3.1027, "step": 1056500 }, { "epoch": 6.64, "learning_rate": 3.3413965666637366e-05, "loss": 3.103, "step": 1057000 }, { "epoch": 6.64, "learning_rate": 3.3406116964498754e-05, "loss": 3.0952, "step": 1057500 }, { "epoch": 6.64, "learning_rate": 3.3398268262360135e-05, "loss": 3.1018, "step": 1058000 }, { "epoch": 6.65, "learning_rate": 3.339041956022152e-05, "loss": 3.1008, "step": 1058500 }, { "epoch": 6.65, "learning_rate": 3.3382570858082905e-05, "loss": 3.1045, "step": 1059000 }, { "epoch": 6.65, "learning_rate": 3.337473785334857e-05, "loss": 3.0957, "step": 1059500 }, { "epoch": 6.66, "learning_rate": 3.3366889151209954e-05, "loss": 3.1008, "step": 1060000 }, { "epoch": 6.66, "learning_rate": 3.335904044907134e-05, "loss": 3.0983, "step": 1060500 }, { "epoch": 6.66, "learning_rate": 3.335119174693273e-05, "loss": 3.096, "step": 1061000 }, { "epoch": 6.67, "learning_rate": 3.334334304479411e-05, "loss": 3.1003, "step": 1061500 }, { "epoch": 6.67, "learning_rate": 3.33354943426555e-05, "loss": 3.1014, "step": 1062000 }, { "epoch": 6.67, "learning_rate": 3.332764564051689e-05, "loss": 3.1041, "step": 1062500 }, { "epoch": 6.67, "learning_rate": 3.331981263578255e-05, "loss": 3.1005, "step": 1063000 }, { "epoch": 6.68, "learning_rate": 3.331196393364394e-05, "loss": 3.0996, "step": 1063500 }, { "epoch": 6.68, "learning_rate": 3.330411523150532e-05, "loss": 3.1001, "step": 1064000 }, { "epoch": 6.68, "learning_rate": 3.329626652936671e-05, "loss": 3.1024, "step": 1064500 }, { "epoch": 6.69, "learning_rate": 3.328843352463237e-05, "loss": 3.1025, "step": 1065000 }, { "epoch": 6.69, "learning_rate": 3.3280584822493756e-05, "loss": 3.1034, "step": 1065500 }, { "epoch": 6.69, "learning_rate": 3.3272736120355144e-05, "loss": 3.1, "step": 1066000 }, { "epoch": 6.7, "learning_rate": 3.3264887418216526e-05, "loss": 3.0994, "step": 1066500 }, { "epoch": 6.7, "learning_rate": 3.325703871607791e-05, "loss": 3.1021, "step": 1067000 }, { "epoch": 6.7, "learning_rate": 3.3249190013939295e-05, "loss": 3.09, "step": 1067500 }, { "epoch": 6.71, "learning_rate": 3.3241341311800683e-05, "loss": 3.1015, "step": 1068000 }, { "epoch": 6.71, "learning_rate": 3.323349260966207e-05, "loss": 3.0885, "step": 1068500 }, { "epoch": 6.71, "learning_rate": 3.322564390752345e-05, "loss": 3.0977, "step": 1069000 }, { "epoch": 6.72, "learning_rate": 3.3217826600193394e-05, "loss": 3.0981, "step": 1069500 }, { "epoch": 6.72, "learning_rate": 3.3209977898054776e-05, "loss": 3.0948, "step": 1070000 }, { "epoch": 6.72, "learning_rate": 3.3202129195916164e-05, "loss": 3.096, "step": 1070500 }, { "epoch": 6.72, "learning_rate": 3.319428049377755e-05, "loss": 3.0926, "step": 1071000 }, { "epoch": 6.73, "learning_rate": 3.318644748904321e-05, "loss": 3.097, "step": 1071500 }, { "epoch": 6.73, "learning_rate": 3.317861448430888e-05, "loss": 3.104, "step": 1072000 }, { "epoch": 6.73, "learning_rate": 3.317076578217026e-05, "loss": 3.0942, "step": 1072500 }, { "epoch": 6.74, "learning_rate": 3.3162917080031644e-05, "loss": 3.0904, "step": 1073000 }, { "epoch": 6.74, "learning_rate": 3.315506837789303e-05, "loss": 3.101, "step": 1073500 }, { "epoch": 6.74, "learning_rate": 3.314721967575442e-05, "loss": 3.0937, "step": 1074000 }, { "epoch": 6.75, "learning_rate": 3.31393709736158e-05, "loss": 3.102, "step": 1074500 }, { "epoch": 6.75, "learning_rate": 3.313152227147719e-05, "loss": 3.105, "step": 1075000 }, { "epoch": 6.75, "learning_rate": 3.312367356933858e-05, "loss": 3.094, "step": 1075500 }, { "epoch": 6.76, "learning_rate": 3.311584056460424e-05, "loss": 3.098, "step": 1076000 }, { "epoch": 6.76, "learning_rate": 3.310799186246563e-05, "loss": 3.0928, "step": 1076500 }, { "epoch": 6.76, "learning_rate": 3.310014316032701e-05, "loss": 3.0932, "step": 1077000 }, { "epoch": 6.77, "learning_rate": 3.30922944581884e-05, "loss": 3.1008, "step": 1077500 }, { "epoch": 6.77, "learning_rate": 3.308444575604978e-05, "loss": 3.0983, "step": 1078000 }, { "epoch": 6.77, "learning_rate": 3.3076612751315447e-05, "loss": 3.1055, "step": 1078500 }, { "epoch": 6.77, "learning_rate": 3.3068764049176835e-05, "loss": 3.0971, "step": 1079000 }, { "epoch": 6.78, "learning_rate": 3.3060915347038216e-05, "loss": 3.0955, "step": 1079500 }, { "epoch": 6.78, "learning_rate": 3.30530666448996e-05, "loss": 3.1052, "step": 1080000 }, { "epoch": 6.78, "eval_accuracy": 0.4467335879956639, "eval_loss": 3.0138120651245117, "eval_runtime": 1449.8708, "eval_samples_per_second": 57.799, "eval_steps_per_second": 5.781, "step": 1080000 }, { "epoch": 6.78, "learning_rate": 3.3045217942760985e-05, "loss": 3.0933, "step": 1080500 }, { "epoch": 6.79, "learning_rate": 3.3037369240622374e-05, "loss": 3.0929, "step": 1081000 }, { "epoch": 6.79, "learning_rate": 3.302952053848376e-05, "loss": 3.0995, "step": 1081500 }, { "epoch": 6.79, "learning_rate": 3.302167183634514e-05, "loss": 3.0916, "step": 1082000 }, { "epoch": 6.8, "learning_rate": 3.301382313420653e-05, "loss": 3.0981, "step": 1082500 }, { "epoch": 6.8, "learning_rate": 3.300597443206791e-05, "loss": 3.0939, "step": 1083000 }, { "epoch": 6.8, "learning_rate": 3.299814142733358e-05, "loss": 3.1023, "step": 1083500 }, { "epoch": 6.81, "learning_rate": 3.299029272519497e-05, "loss": 3.0926, "step": 1084000 }, { "epoch": 6.81, "learning_rate": 3.298244402305635e-05, "loss": 3.0974, "step": 1084500 }, { "epoch": 6.81, "learning_rate": 3.297459532091773e-05, "loss": 3.0974, "step": 1085000 }, { "epoch": 6.82, "learning_rate": 3.296674661877912e-05, "loss": 3.0994, "step": 1085500 }, { "epoch": 6.82, "learning_rate": 3.295891361404478e-05, "loss": 3.1009, "step": 1086000 }, { "epoch": 6.82, "learning_rate": 3.295106491190617e-05, "loss": 3.0965, "step": 1086500 }, { "epoch": 6.83, "learning_rate": 3.294321620976755e-05, "loss": 3.0966, "step": 1087000 }, { "epoch": 6.83, "learning_rate": 3.293536750762894e-05, "loss": 3.1005, "step": 1087500 }, { "epoch": 6.83, "learning_rate": 3.292751880549033e-05, "loss": 3.1046, "step": 1088000 }, { "epoch": 6.83, "learning_rate": 3.291968580075599e-05, "loss": 3.0993, "step": 1088500 }, { "epoch": 6.84, "learning_rate": 3.2911837098617376e-05, "loss": 3.0913, "step": 1089000 }, { "epoch": 6.84, "learning_rate": 3.290398839647876e-05, "loss": 3.0995, "step": 1089500 }, { "epoch": 6.84, "learning_rate": 3.2896139694340146e-05, "loss": 3.0988, "step": 1090000 }, { "epoch": 6.85, "learning_rate": 3.2888290992201534e-05, "loss": 3.1002, "step": 1090500 }, { "epoch": 6.85, "learning_rate": 3.2880442290062915e-05, "loss": 3.1006, "step": 1091000 }, { "epoch": 6.85, "learning_rate": 3.28725935879243e-05, "loss": 3.0943, "step": 1091500 }, { "epoch": 6.86, "learning_rate": 3.2864760583189965e-05, "loss": 3.0917, "step": 1092000 }, { "epoch": 6.86, "learning_rate": 3.285691188105135e-05, "loss": 3.0986, "step": 1092500 }, { "epoch": 6.86, "learning_rate": 3.2849063178912734e-05, "loss": 3.0991, "step": 1093000 }, { "epoch": 6.87, "learning_rate": 3.284121447677412e-05, "loss": 3.0987, "step": 1093500 }, { "epoch": 6.87, "learning_rate": 3.283336577463551e-05, "loss": 3.0931, "step": 1094000 }, { "epoch": 6.87, "learning_rate": 3.282551707249689e-05, "loss": 3.0993, "step": 1094500 }, { "epoch": 6.88, "learning_rate": 3.281766837035828e-05, "loss": 3.1053, "step": 1095000 }, { "epoch": 6.88, "learning_rate": 3.280981966821967e-05, "loss": 3.1045, "step": 1095500 }, { "epoch": 6.88, "learning_rate": 3.280197096608105e-05, "loss": 3.0954, "step": 1096000 }, { "epoch": 6.88, "learning_rate": 3.279412226394243e-05, "loss": 3.0943, "step": 1096500 }, { "epoch": 6.89, "learning_rate": 3.278627356180382e-05, "loss": 3.0999, "step": 1097000 }, { "epoch": 6.89, "learning_rate": 3.277842485966521e-05, "loss": 3.0923, "step": 1097500 }, { "epoch": 6.89, "learning_rate": 3.277059185493087e-05, "loss": 3.0979, "step": 1098000 }, { "epoch": 6.9, "learning_rate": 3.2762743152792257e-05, "loss": 3.0998, "step": 1098500 }, { "epoch": 6.9, "learning_rate": 3.275489445065364e-05, "loss": 3.0983, "step": 1099000 }, { "epoch": 6.9, "learning_rate": 3.2747045748515026e-05, "loss": 3.0927, "step": 1099500 }, { "epoch": 6.91, "learning_rate": 3.273921274378069e-05, "loss": 3.0976, "step": 1100000 }, { "epoch": 6.91, "learning_rate": 3.2731364041642076e-05, "loss": 3.098, "step": 1100500 }, { "epoch": 6.91, "learning_rate": 3.2723515339503464e-05, "loss": 3.0975, "step": 1101000 }, { "epoch": 6.92, "learning_rate": 3.2715666637364845e-05, "loss": 3.0941, "step": 1101500 }, { "epoch": 6.92, "learning_rate": 3.270781793522623e-05, "loss": 3.0896, "step": 1102000 }, { "epoch": 6.92, "learning_rate": 3.2700000627896175e-05, "loss": 3.0872, "step": 1102500 }, { "epoch": 6.93, "learning_rate": 3.2692151925757556e-05, "loss": 3.0925, "step": 1103000 }, { "epoch": 6.93, "learning_rate": 3.2684303223618944e-05, "loss": 3.0955, "step": 1103500 }, { "epoch": 6.93, "learning_rate": 3.267645452148033e-05, "loss": 3.0998, "step": 1104000 }, { "epoch": 6.94, "learning_rate": 3.2668605819341714e-05, "loss": 3.0923, "step": 1104500 }, { "epoch": 6.94, "learning_rate": 3.26607571172031e-05, "loss": 3.0954, "step": 1105000 }, { "epoch": 6.94, "learning_rate": 3.265290841506449e-05, "loss": 3.0889, "step": 1105500 }, { "epoch": 6.94, "learning_rate": 3.264505971292587e-05, "loss": 3.0973, "step": 1106000 }, { "epoch": 6.95, "learning_rate": 3.263722670819154e-05, "loss": 3.0914, "step": 1106500 }, { "epoch": 6.95, "learning_rate": 3.262937800605292e-05, "loss": 3.0989, "step": 1107000 }, { "epoch": 6.95, "learning_rate": 3.262152930391431e-05, "loss": 3.0987, "step": 1107500 }, { "epoch": 6.96, "learning_rate": 3.261368060177569e-05, "loss": 3.095, "step": 1108000 }, { "epoch": 6.96, "learning_rate": 3.260583189963708e-05, "loss": 3.0924, "step": 1108500 }, { "epoch": 6.96, "learning_rate": 3.259799889490274e-05, "loss": 3.0941, "step": 1109000 }, { "epoch": 6.97, "learning_rate": 3.259015019276412e-05, "loss": 3.0923, "step": 1109500 }, { "epoch": 6.97, "learning_rate": 3.258230149062551e-05, "loss": 3.1007, "step": 1110000 }, { "epoch": 6.97, "eval_accuracy": 0.44710589158977065, "eval_loss": 3.0103232860565186, "eval_runtime": 1449.8645, "eval_samples_per_second": 57.799, "eval_steps_per_second": 5.781, "step": 1110000 }, { "epoch": 6.97, "learning_rate": 3.257446848589118e-05, "loss": 3.0947, "step": 1110500 }, { "epoch": 6.98, "learning_rate": 3.256663548115684e-05, "loss": 3.0909, "step": 1111000 }, { "epoch": 6.98, "learning_rate": 3.255878677901823e-05, "loss": 3.0991, "step": 1111500 }, { "epoch": 6.98, "learning_rate": 3.255093807687961e-05, "loss": 3.0949, "step": 1112000 }, { "epoch": 6.99, "learning_rate": 3.254308937474099e-05, "loss": 3.0958, "step": 1112500 }, { "epoch": 6.99, "learning_rate": 3.253524067260238e-05, "loss": 3.0961, "step": 1113000 }, { "epoch": 6.99, "learning_rate": 3.2527391970463766e-05, "loss": 3.0931, "step": 1113500 }, { "epoch": 6.99, "learning_rate": 3.2519543268325154e-05, "loss": 3.0967, "step": 1114000 }, { "epoch": 7.0, "learning_rate": 3.2511694566186535e-05, "loss": 3.0938, "step": 1114500 }, { "epoch": 7.0, "learning_rate": 3.250384586404792e-05, "loss": 3.0908, "step": 1115000 }, { "epoch": 7.0, "learning_rate": 3.249599716190931e-05, "loss": 3.0771, "step": 1115500 }, { "epoch": 7.01, "learning_rate": 3.248814845977069e-05, "loss": 3.074, "step": 1116000 }, { "epoch": 7.01, "learning_rate": 3.248029975763208e-05, "loss": 3.0732, "step": 1116500 }, { "epoch": 7.01, "learning_rate": 3.247245105549346e-05, "loss": 3.0755, "step": 1117000 }, { "epoch": 7.02, "learning_rate": 3.246460235335485e-05, "loss": 3.076, "step": 1117500 }, { "epoch": 7.02, "learning_rate": 3.245675365121624e-05, "loss": 3.0785, "step": 1118000 }, { "epoch": 7.02, "learning_rate": 3.244890494907763e-05, "loss": 3.0796, "step": 1118500 }, { "epoch": 7.03, "learning_rate": 3.244107194434329e-05, "loss": 3.0711, "step": 1119000 }, { "epoch": 7.03, "learning_rate": 3.243322324220467e-05, "loss": 3.0685, "step": 1119500 }, { "epoch": 7.03, "learning_rate": 3.242537454006606e-05, "loss": 3.0764, "step": 1120000 }, { "epoch": 7.04, "learning_rate": 3.2417525837927446e-05, "loss": 3.0809, "step": 1120500 }, { "epoch": 7.04, "learning_rate": 3.240970853059738e-05, "loss": 3.0791, "step": 1121000 }, { "epoch": 7.04, "learning_rate": 3.240185982845877e-05, "loss": 3.0758, "step": 1121500 }, { "epoch": 7.04, "learning_rate": 3.239401112632015e-05, "loss": 3.0685, "step": 1122000 }, { "epoch": 7.05, "learning_rate": 3.238616242418154e-05, "loss": 3.084, "step": 1122500 }, { "epoch": 7.05, "learning_rate": 3.23783294194472e-05, "loss": 3.0778, "step": 1123000 }, { "epoch": 7.05, "learning_rate": 3.237048071730859e-05, "loss": 3.0745, "step": 1123500 }, { "epoch": 7.06, "learning_rate": 3.2362632015169976e-05, "loss": 3.0762, "step": 1124000 }, { "epoch": 7.06, "learning_rate": 3.235478331303136e-05, "loss": 3.0744, "step": 1124500 }, { "epoch": 7.06, "learning_rate": 3.2346934610892745e-05, "loss": 3.0704, "step": 1125000 }, { "epoch": 7.07, "learning_rate": 3.233908590875413e-05, "loss": 3.0782, "step": 1125500 }, { "epoch": 7.07, "learning_rate": 3.2331237206615514e-05, "loss": 3.0841, "step": 1126000 }, { "epoch": 7.07, "learning_rate": 3.232340420188118e-05, "loss": 3.0779, "step": 1126500 }, { "epoch": 7.08, "learning_rate": 3.2315571197146844e-05, "loss": 3.0823, "step": 1127000 }, { "epoch": 7.08, "learning_rate": 3.2307722495008225e-05, "loss": 3.0811, "step": 1127500 }, { "epoch": 7.08, "learning_rate": 3.2299873792869613e-05, "loss": 3.0781, "step": 1128000 }, { "epoch": 7.09, "learning_rate": 3.2292025090731e-05, "loss": 3.0709, "step": 1128500 }, { "epoch": 7.09, "learning_rate": 3.228417638859238e-05, "loss": 3.0773, "step": 1129000 }, { "epoch": 7.09, "learning_rate": 3.227632768645377e-05, "loss": 3.0848, "step": 1129500 }, { "epoch": 7.1, "learning_rate": 3.226847898431515e-05, "loss": 3.0771, "step": 1130000 }, { "epoch": 7.1, "learning_rate": 3.226063028217654e-05, "loss": 3.0825, "step": 1130500 }, { "epoch": 7.1, "learning_rate": 3.225278158003793e-05, "loss": 3.0785, "step": 1131000 }, { "epoch": 7.1, "learning_rate": 3.224493287789932e-05, "loss": 3.0825, "step": 1131500 }, { "epoch": 7.11, "learning_rate": 3.22370841757607e-05, "loss": 3.0831, "step": 1132000 }, { "epoch": 7.11, "learning_rate": 3.222923547362208e-05, "loss": 3.0898, "step": 1132500 }, { "epoch": 7.11, "learning_rate": 3.222138677148347e-05, "loss": 3.0834, "step": 1133000 }, { "epoch": 7.12, "learning_rate": 3.2213553766749136e-05, "loss": 3.0803, "step": 1133500 }, { "epoch": 7.12, "learning_rate": 3.220570506461052e-05, "loss": 3.0804, "step": 1134000 }, { "epoch": 7.12, "learning_rate": 3.21978563624719e-05, "loss": 3.0773, "step": 1134500 }, { "epoch": 7.13, "learning_rate": 3.219002335773757e-05, "loss": 3.0862, "step": 1135000 }, { "epoch": 7.13, "learning_rate": 3.218217465559895e-05, "loss": 3.0805, "step": 1135500 }, { "epoch": 7.13, "learning_rate": 3.2174341650864616e-05, "loss": 3.0763, "step": 1136000 }, { "epoch": 7.14, "learning_rate": 3.2166492948726004e-05, "loss": 3.0838, "step": 1136500 }, { "epoch": 7.14, "learning_rate": 3.2158644246587386e-05, "loss": 3.0837, "step": 1137000 }, { "epoch": 7.14, "learning_rate": 3.215079554444877e-05, "loss": 3.0775, "step": 1137500 }, { "epoch": 7.15, "learning_rate": 3.2142946842310155e-05, "loss": 3.0811, "step": 1138000 }, { "epoch": 7.15, "learning_rate": 3.213509814017154e-05, "loss": 3.0745, "step": 1138500 }, { "epoch": 7.15, "learning_rate": 3.212724943803293e-05, "loss": 3.0801, "step": 1139000 }, { "epoch": 7.15, "learning_rate": 3.211941643329859e-05, "loss": 3.0756, "step": 1139500 }, { "epoch": 7.16, "learning_rate": 3.2111567731159974e-05, "loss": 3.0779, "step": 1140000 }, { "epoch": 7.16, "eval_accuracy": 0.4476266873589871, "eval_loss": 3.0081255435943604, "eval_runtime": 1450.2182, "eval_samples_per_second": 57.785, "eval_steps_per_second": 5.779, "step": 1140000 }, { "epoch": 7.16, "learning_rate": 3.210371902902136e-05, "loss": 3.0836, "step": 1140500 }, { "epoch": 7.16, "learning_rate": 3.209587032688275e-05, "loss": 3.0801, "step": 1141000 }, { "epoch": 7.17, "learning_rate": 3.208802162474414e-05, "loss": 3.0794, "step": 1141500 }, { "epoch": 7.17, "learning_rate": 3.208017292260552e-05, "loss": 3.0816, "step": 1142000 }, { "epoch": 7.17, "learning_rate": 3.20723242204669e-05, "loss": 3.0846, "step": 1142500 }, { "epoch": 7.18, "learning_rate": 3.206447551832829e-05, "loss": 3.0844, "step": 1143000 }, { "epoch": 7.18, "learning_rate": 3.205662681618968e-05, "loss": 3.0814, "step": 1143500 }, { "epoch": 7.18, "learning_rate": 3.204879381145534e-05, "loss": 3.0881, "step": 1144000 }, { "epoch": 7.19, "learning_rate": 3.204094510931672e-05, "loss": 3.0893, "step": 1144500 }, { "epoch": 7.19, "learning_rate": 3.203309640717811e-05, "loss": 3.0739, "step": 1145000 }, { "epoch": 7.19, "learning_rate": 3.2025247705039496e-05, "loss": 3.082, "step": 1145500 }, { "epoch": 7.2, "learning_rate": 3.2017399002900885e-05, "loss": 3.0825, "step": 1146000 }, { "epoch": 7.2, "learning_rate": 3.200955030076227e-05, "loss": 3.0871, "step": 1146500 }, { "epoch": 7.2, "learning_rate": 3.2001701598623654e-05, "loss": 3.0803, "step": 1147000 }, { "epoch": 7.21, "learning_rate": 3.1993852896485035e-05, "loss": 3.0806, "step": 1147500 }, { "epoch": 7.21, "learning_rate": 3.1986004194346424e-05, "loss": 3.0868, "step": 1148000 }, { "epoch": 7.21, "learning_rate": 3.197815549220781e-05, "loss": 3.0882, "step": 1148500 }, { "epoch": 7.21, "learning_rate": 3.197032248747347e-05, "loss": 3.0804, "step": 1149000 }, { "epoch": 7.22, "learning_rate": 3.1962489482739134e-05, "loss": 3.0802, "step": 1149500 }, { "epoch": 7.22, "learning_rate": 3.195464078060052e-05, "loss": 3.0823, "step": 1150000 }, { "epoch": 7.22, "learning_rate": 3.1946792078461904e-05, "loss": 3.0821, "step": 1150500 }, { "epoch": 7.23, "learning_rate": 3.193894337632329e-05, "loss": 3.0856, "step": 1151000 }, { "epoch": 7.23, "learning_rate": 3.193109467418468e-05, "loss": 3.0811, "step": 1151500 }, { "epoch": 7.23, "learning_rate": 3.192324597204606e-05, "loss": 3.0879, "step": 1152000 }, { "epoch": 7.24, "learning_rate": 3.191539726990745e-05, "loss": 3.0815, "step": 1152500 }, { "epoch": 7.24, "learning_rate": 3.190754856776884e-05, "loss": 3.0792, "step": 1153000 }, { "epoch": 7.24, "learning_rate": 3.1899699865630226e-05, "loss": 3.0887, "step": 1153500 }, { "epoch": 7.25, "learning_rate": 3.189186686089589e-05, "loss": 3.0831, "step": 1154000 }, { "epoch": 7.25, "learning_rate": 3.188401815875727e-05, "loss": 3.0874, "step": 1154500 }, { "epoch": 7.25, "learning_rate": 3.187616945661866e-05, "loss": 3.0907, "step": 1155000 }, { "epoch": 7.26, "learning_rate": 3.186832075448004e-05, "loss": 3.076, "step": 1155500 }, { "epoch": 7.26, "learning_rate": 3.1860472052341426e-05, "loss": 3.083, "step": 1156000 }, { "epoch": 7.26, "learning_rate": 3.185267044241564e-05, "loss": 3.0894, "step": 1156500 }, { "epoch": 7.26, "learning_rate": 3.184482174027703e-05, "loss": 3.0819, "step": 1157000 }, { "epoch": 7.27, "learning_rate": 3.183697303813841e-05, "loss": 3.079, "step": 1157500 }, { "epoch": 7.27, "learning_rate": 3.18291243359998e-05, "loss": 3.0864, "step": 1158000 }, { "epoch": 7.27, "learning_rate": 3.182127563386119e-05, "loss": 3.0802, "step": 1158500 }, { "epoch": 7.28, "learning_rate": 3.1813426931722575e-05, "loss": 3.0762, "step": 1159000 }, { "epoch": 7.28, "learning_rate": 3.180557822958396e-05, "loss": 3.086, "step": 1159500 }, { "epoch": 7.28, "learning_rate": 3.1797729527445344e-05, "loss": 3.083, "step": 1160000 }, { "epoch": 7.29, "learning_rate": 3.1789896522711006e-05, "loss": 3.0864, "step": 1160500 }, { "epoch": 7.29, "learning_rate": 3.1782047820572394e-05, "loss": 3.0835, "step": 1161000 }, { "epoch": 7.29, "learning_rate": 3.1774199118433775e-05, "loss": 3.0862, "step": 1161500 }, { "epoch": 7.3, "learning_rate": 3.176636611369944e-05, "loss": 3.0824, "step": 1162000 }, { "epoch": 7.3, "learning_rate": 3.1758517411560825e-05, "loss": 3.0829, "step": 1162500 }, { "epoch": 7.3, "learning_rate": 3.175066870942221e-05, "loss": 3.0834, "step": 1163000 }, { "epoch": 7.31, "learning_rate": 3.1742820007283594e-05, "loss": 3.0799, "step": 1163500 }, { "epoch": 7.31, "learning_rate": 3.173497130514498e-05, "loss": 3.0842, "step": 1164000 }, { "epoch": 7.31, "learning_rate": 3.172712260300637e-05, "loss": 3.0802, "step": 1164500 }, { "epoch": 7.31, "learning_rate": 3.171927390086775e-05, "loss": 3.0818, "step": 1165000 }, { "epoch": 7.32, "learning_rate": 3.171142519872914e-05, "loss": 3.086, "step": 1165500 }, { "epoch": 7.32, "learning_rate": 3.170357649659053e-05, "loss": 3.0862, "step": 1166000 }, { "epoch": 7.32, "learning_rate": 3.169572779445191e-05, "loss": 3.0875, "step": 1166500 }, { "epoch": 7.33, "learning_rate": 3.168787909231329e-05, "loss": 3.092, "step": 1167000 }, { "epoch": 7.33, "learning_rate": 3.168003039017468e-05, "loss": 3.0788, "step": 1167500 }, { "epoch": 7.33, "learning_rate": 3.167218168803607e-05, "loss": 3.0799, "step": 1168000 }, { "epoch": 7.34, "learning_rate": 3.1664332985897455e-05, "loss": 3.0844, "step": 1168500 }, { "epoch": 7.34, "learning_rate": 3.165648428375884e-05, "loss": 3.0824, "step": 1169000 }, { "epoch": 7.34, "learning_rate": 3.16486512790245e-05, "loss": 3.0893, "step": 1169500 }, { "epoch": 7.35, "learning_rate": 3.1640818274290166e-05, "loss": 3.081, "step": 1170000 }, { "epoch": 7.35, "eval_accuracy": 0.44790255852484306, "eval_loss": 3.0053632259368896, "eval_runtime": 1449.0205, "eval_samples_per_second": 57.833, "eval_steps_per_second": 5.784, "step": 1170000 }, { "epoch": 7.35, "learning_rate": 3.163296957215155e-05, "loss": 3.0834, "step": 1170500 }, { "epoch": 7.35, "learning_rate": 3.1625120870012935e-05, "loss": 3.0838, "step": 1171000 }, { "epoch": 7.36, "learning_rate": 3.1617272167874324e-05, "loss": 3.0846, "step": 1171500 }, { "epoch": 7.36, "learning_rate": 3.160942346573571e-05, "loss": 3.0915, "step": 1172000 }, { "epoch": 7.36, "learning_rate": 3.160157476359709e-05, "loss": 3.0801, "step": 1172500 }, { "epoch": 7.37, "learning_rate": 3.159372606145848e-05, "loss": 3.0871, "step": 1173000 }, { "epoch": 7.37, "learning_rate": 3.158587735931986e-05, "loss": 3.0815, "step": 1173500 }, { "epoch": 7.37, "learning_rate": 3.157802865718125e-05, "loss": 3.0798, "step": 1174000 }, { "epoch": 7.37, "learning_rate": 3.157017995504263e-05, "loss": 3.0847, "step": 1174500 }, { "epoch": 7.38, "learning_rate": 3.156233125290402e-05, "loss": 3.0889, "step": 1175000 }, { "epoch": 7.38, "learning_rate": 3.155448255076541e-05, "loss": 3.0846, "step": 1175500 }, { "epoch": 7.38, "learning_rate": 3.1546633848626796e-05, "loss": 3.0836, "step": 1176000 }, { "epoch": 7.39, "learning_rate": 3.153881654129673e-05, "loss": 3.09, "step": 1176500 }, { "epoch": 7.39, "learning_rate": 3.153096783915812e-05, "loss": 3.081, "step": 1177000 }, { "epoch": 7.39, "learning_rate": 3.15231191370195e-05, "loss": 3.0928, "step": 1177500 }, { "epoch": 7.4, "learning_rate": 3.151527043488089e-05, "loss": 3.0808, "step": 1178000 }, { "epoch": 7.4, "learning_rate": 3.150742173274228e-05, "loss": 3.0778, "step": 1178500 }, { "epoch": 7.4, "learning_rate": 3.1499573030603665e-05, "loss": 3.0862, "step": 1179000 }, { "epoch": 7.41, "learning_rate": 3.1491740025869326e-05, "loss": 3.0893, "step": 1179500 }, { "epoch": 7.41, "learning_rate": 3.148389132373071e-05, "loss": 3.0831, "step": 1180000 }, { "epoch": 7.41, "learning_rate": 3.1476042621592096e-05, "loss": 3.082, "step": 1180500 }, { "epoch": 7.42, "learning_rate": 3.1468193919453484e-05, "loss": 3.0829, "step": 1181000 }, { "epoch": 7.42, "learning_rate": 3.1460345217314865e-05, "loss": 3.0843, "step": 1181500 }, { "epoch": 7.42, "learning_rate": 3.145251221258053e-05, "loss": 3.0848, "step": 1182000 }, { "epoch": 7.42, "learning_rate": 3.1444663510441915e-05, "loss": 3.0807, "step": 1182500 }, { "epoch": 7.43, "learning_rate": 3.14368148083033e-05, "loss": 3.0919, "step": 1183000 }, { "epoch": 7.43, "learning_rate": 3.1428966106164684e-05, "loss": 3.0831, "step": 1183500 }, { "epoch": 7.43, "learning_rate": 3.142113310143035e-05, "loss": 3.0811, "step": 1184000 }, { "epoch": 7.44, "learning_rate": 3.1413284399291734e-05, "loss": 3.0874, "step": 1184500 }, { "epoch": 7.44, "learning_rate": 3.1405435697153115e-05, "loss": 3.0834, "step": 1185000 }, { "epoch": 7.44, "learning_rate": 3.139760269241878e-05, "loss": 3.0775, "step": 1185500 }, { "epoch": 7.45, "learning_rate": 3.138975399028017e-05, "loss": 3.0811, "step": 1186000 }, { "epoch": 7.45, "learning_rate": 3.138190528814155e-05, "loss": 3.0844, "step": 1186500 }, { "epoch": 7.45, "learning_rate": 3.137405658600294e-05, "loss": 3.0811, "step": 1187000 }, { "epoch": 7.46, "learning_rate": 3.136620788386432e-05, "loss": 3.0807, "step": 1187500 }, { "epoch": 7.46, "learning_rate": 3.135835918172571e-05, "loss": 3.0812, "step": 1188000 }, { "epoch": 7.46, "learning_rate": 3.135052617699137e-05, "loss": 3.0846, "step": 1188500 }, { "epoch": 7.47, "learning_rate": 3.134267747485276e-05, "loss": 3.0812, "step": 1189000 }, { "epoch": 7.47, "learning_rate": 3.133482877271415e-05, "loss": 3.0793, "step": 1189500 }, { "epoch": 7.47, "learning_rate": 3.132698007057553e-05, "loss": 3.0822, "step": 1190000 }, { "epoch": 7.48, "learning_rate": 3.131914706584119e-05, "loss": 3.0842, "step": 1190500 }, { "epoch": 7.48, "learning_rate": 3.131129836370258e-05, "loss": 3.0858, "step": 1191000 }, { "epoch": 7.48, "learning_rate": 3.130344966156397e-05, "loss": 3.0782, "step": 1191500 }, { "epoch": 7.48, "learning_rate": 3.1295600959425355e-05, "loss": 3.0809, "step": 1192000 }, { "epoch": 7.49, "learning_rate": 3.1287752257286736e-05, "loss": 3.0891, "step": 1192500 }, { "epoch": 7.49, "learning_rate": 3.1279903555148124e-05, "loss": 3.0812, "step": 1193000 }, { "epoch": 7.49, "learning_rate": 3.1272054853009506e-05, "loss": 3.0828, "step": 1193500 }, { "epoch": 7.5, "learning_rate": 3.1264206150870894e-05, "loss": 3.0838, "step": 1194000 }, { "epoch": 7.5, "learning_rate": 3.125635744873228e-05, "loss": 3.079, "step": 1194500 }, { "epoch": 7.5, "learning_rate": 3.1248508746593663e-05, "loss": 3.0799, "step": 1195000 }, { "epoch": 7.51, "learning_rate": 3.124066004445505e-05, "loss": 3.0789, "step": 1195500 }, { "epoch": 7.51, "learning_rate": 3.123281134231644e-05, "loss": 3.0852, "step": 1196000 }, { "epoch": 7.51, "learning_rate": 3.122496264017782e-05, "loss": 3.0778, "step": 1196500 }, { "epoch": 7.52, "learning_rate": 3.12171139380392e-05, "loss": 3.0858, "step": 1197000 }, { "epoch": 7.52, "learning_rate": 3.120926523590059e-05, "loss": 3.0835, "step": 1197500 }, { "epoch": 7.52, "learning_rate": 3.120144792857053e-05, "loss": 3.0864, "step": 1198000 }, { "epoch": 7.53, "learning_rate": 3.119359922643192e-05, "loss": 3.0832, "step": 1198500 }, { "epoch": 7.53, "learning_rate": 3.118575052429331e-05, "loss": 3.0816, "step": 1199000 }, { "epoch": 7.53, "learning_rate": 3.117790182215469e-05, "loss": 3.0822, "step": 1199500 }, { "epoch": 7.53, "learning_rate": 3.117008451482463e-05, "loss": 3.0811, "step": 1200000 }, { "epoch": 7.53, "eval_accuracy": 0.4483708046280053, "eval_loss": 3.0015673637390137, "eval_runtime": 1449.3629, "eval_samples_per_second": 57.819, "eval_steps_per_second": 5.783, "step": 1200000 }, { "epoch": 7.54, "learning_rate": 3.116223581268601e-05, "loss": 3.0804, "step": 1200500 }, { "epoch": 7.54, "learning_rate": 3.11543871105474e-05, "loss": 3.08, "step": 1201000 }, { "epoch": 7.54, "learning_rate": 3.114653840840879e-05, "loss": 3.0854, "step": 1201500 }, { "epoch": 7.55, "learning_rate": 3.113868970627018e-05, "loss": 3.085, "step": 1202000 }, { "epoch": 7.55, "learning_rate": 3.113084100413156e-05, "loss": 3.0736, "step": 1202500 }, { "epoch": 7.55, "learning_rate": 3.112299230199294e-05, "loss": 3.0782, "step": 1203000 }, { "epoch": 7.56, "learning_rate": 3.111514359985433e-05, "loss": 3.084, "step": 1203500 }, { "epoch": 7.56, "learning_rate": 3.1107294897715716e-05, "loss": 3.0747, "step": 1204000 }, { "epoch": 7.56, "learning_rate": 3.1099446195577104e-05, "loss": 3.0802, "step": 1204500 }, { "epoch": 7.57, "learning_rate": 3.1091597493438485e-05, "loss": 3.0845, "step": 1205000 }, { "epoch": 7.57, "learning_rate": 3.108374879129987e-05, "loss": 3.0824, "step": 1205500 }, { "epoch": 7.57, "learning_rate": 3.107590008916126e-05, "loss": 3.0809, "step": 1206000 }, { "epoch": 7.58, "learning_rate": 3.106805138702264e-05, "loss": 3.0814, "step": 1206500 }, { "epoch": 7.58, "learning_rate": 3.106021838228831e-05, "loss": 3.0848, "step": 1207000 }, { "epoch": 7.58, "learning_rate": 3.105236968014969e-05, "loss": 3.0796, "step": 1207500 }, { "epoch": 7.58, "learning_rate": 3.1044520978011074e-05, "loss": 3.0835, "step": 1208000 }, { "epoch": 7.59, "learning_rate": 3.103667227587246e-05, "loss": 3.0839, "step": 1208500 }, { "epoch": 7.59, "learning_rate": 3.102882357373385e-05, "loss": 3.0793, "step": 1209000 }, { "epoch": 7.59, "learning_rate": 3.102097487159523e-05, "loss": 3.0859, "step": 1209500 }, { "epoch": 7.6, "learning_rate": 3.101312616945662e-05, "loss": 3.0841, "step": 1210000 }, { "epoch": 7.6, "learning_rate": 3.100527746731801e-05, "loss": 3.0785, "step": 1210500 }, { "epoch": 7.6, "learning_rate": 3.0997428765179396e-05, "loss": 3.0851, "step": 1211000 }, { "epoch": 7.61, "learning_rate": 3.098959576044506e-05, "loss": 3.0844, "step": 1211500 }, { "epoch": 7.61, "learning_rate": 3.098174705830644e-05, "loss": 3.0882, "step": 1212000 }, { "epoch": 7.61, "learning_rate": 3.0973898356167826e-05, "loss": 3.0795, "step": 1212500 }, { "epoch": 7.62, "learning_rate": 3.096604965402921e-05, "loss": 3.0908, "step": 1213000 }, { "epoch": 7.62, "learning_rate": 3.0958216649294876e-05, "loss": 3.0847, "step": 1213500 }, { "epoch": 7.62, "learning_rate": 3.0950367947156264e-05, "loss": 3.0759, "step": 1214000 }, { "epoch": 7.63, "learning_rate": 3.0942519245017645e-05, "loss": 3.0852, "step": 1214500 }, { "epoch": 7.63, "learning_rate": 3.093468624028331e-05, "loss": 3.0814, "step": 1215000 }, { "epoch": 7.63, "learning_rate": 3.0926837538144695e-05, "loss": 3.08, "step": 1215500 }, { "epoch": 7.64, "learning_rate": 3.091898883600608e-05, "loss": 3.0769, "step": 1216000 }, { "epoch": 7.64, "learning_rate": 3.0911140133867464e-05, "loss": 3.083, "step": 1216500 }, { "epoch": 7.64, "learning_rate": 3.090329143172885e-05, "loss": 3.0755, "step": 1217000 }, { "epoch": 7.64, "learning_rate": 3.0895458426994514e-05, "loss": 3.0749, "step": 1217500 }, { "epoch": 7.65, "learning_rate": 3.0887609724855895e-05, "loss": 3.0864, "step": 1218000 }, { "epoch": 7.65, "learning_rate": 3.087976102271728e-05, "loss": 3.0837, "step": 1218500 }, { "epoch": 7.65, "learning_rate": 3.087191232057867e-05, "loss": 3.0739, "step": 1219000 }, { "epoch": 7.66, "learning_rate": 3.086406361844006e-05, "loss": 3.0811, "step": 1219500 }, { "epoch": 7.66, "learning_rate": 3.085621491630144e-05, "loss": 3.0881, "step": 1220000 }, { "epoch": 7.66, "learning_rate": 3.084836621416283e-05, "loss": 3.0799, "step": 1220500 }, { "epoch": 7.67, "learning_rate": 3.0840548906832764e-05, "loss": 3.079, "step": 1221000 }, { "epoch": 7.67, "learning_rate": 3.083270020469415e-05, "loss": 3.0763, "step": 1221500 }, { "epoch": 7.67, "learning_rate": 3.082485150255554e-05, "loss": 3.0766, "step": 1222000 }, { "epoch": 7.68, "learning_rate": 3.081700280041692e-05, "loss": 3.0847, "step": 1222500 }, { "epoch": 7.68, "learning_rate": 3.080915409827831e-05, "loss": 3.0777, "step": 1223000 }, { "epoch": 7.68, "learning_rate": 3.08013053961397e-05, "loss": 3.083, "step": 1223500 }, { "epoch": 7.69, "learning_rate": 3.079347239140536e-05, "loss": 3.0758, "step": 1224000 }, { "epoch": 7.69, "learning_rate": 3.078562368926675e-05, "loss": 3.082, "step": 1224500 }, { "epoch": 7.69, "learning_rate": 3.077777498712813e-05, "loss": 3.0848, "step": 1225000 }, { "epoch": 7.69, "learning_rate": 3.0769926284989517e-05, "loss": 3.0863, "step": 1225500 }, { "epoch": 7.7, "learning_rate": 3.07620775828509e-05, "loss": 3.0877, "step": 1226000 }, { "epoch": 7.7, "learning_rate": 3.0754228880712286e-05, "loss": 3.0775, "step": 1226500 }, { "epoch": 7.7, "learning_rate": 3.0746380178573674e-05, "loss": 3.0848, "step": 1227000 }, { "epoch": 7.71, "learning_rate": 3.0738531476435056e-05, "loss": 3.0843, "step": 1227500 }, { "epoch": 7.71, "learning_rate": 3.0730682774296444e-05, "loss": 3.081, "step": 1228000 }, { "epoch": 7.71, "learning_rate": 3.072283407215783e-05, "loss": 3.0848, "step": 1228500 }, { "epoch": 7.72, "learning_rate": 3.071500106742349e-05, "loss": 3.0819, "step": 1229000 }, { "epoch": 7.72, "learning_rate": 3.070715236528488e-05, "loss": 3.0831, "step": 1229500 }, { "epoch": 7.72, "learning_rate": 3.069930366314626e-05, "loss": 3.0815, "step": 1230000 }, { "epoch": 7.72, "eval_accuracy": 0.44860091500808313, "eval_loss": 2.9992339611053467, "eval_runtime": 1449.0518, "eval_samples_per_second": 57.832, "eval_steps_per_second": 5.784, "step": 1230000 }, { "epoch": 7.73, "learning_rate": 3.0691470658411924e-05, "loss": 3.0823, "step": 1230500 }, { "epoch": 7.73, "learning_rate": 3.068362195627331e-05, "loss": 3.08, "step": 1231000 }, { "epoch": 7.73, "learning_rate": 3.0675788951538974e-05, "loss": 3.0765, "step": 1231500 }, { "epoch": 7.74, "learning_rate": 3.066794024940036e-05, "loss": 3.0828, "step": 1232000 }, { "epoch": 7.74, "learning_rate": 3.066009154726175e-05, "loss": 3.0814, "step": 1232500 }, { "epoch": 7.74, "learning_rate": 3.065224284512313e-05, "loss": 3.0818, "step": 1233000 }, { "epoch": 7.75, "learning_rate": 3.064439414298452e-05, "loss": 3.084, "step": 1233500 }, { "epoch": 7.75, "learning_rate": 3.063654544084591e-05, "loss": 3.0788, "step": 1234000 }, { "epoch": 7.75, "learning_rate": 3.062871243611157e-05, "loss": 3.0867, "step": 1234500 }, { "epoch": 7.75, "learning_rate": 3.062086373397296e-05, "loss": 3.0869, "step": 1235000 }, { "epoch": 7.76, "learning_rate": 3.061301503183434e-05, "loss": 3.0788, "step": 1235500 }, { "epoch": 7.76, "learning_rate": 3.060516632969572e-05, "loss": 3.0836, "step": 1236000 }, { "epoch": 7.76, "learning_rate": 3.059731762755711e-05, "loss": 3.0789, "step": 1236500 }, { "epoch": 7.77, "learning_rate": 3.0589468925418496e-05, "loss": 3.0817, "step": 1237000 }, { "epoch": 7.77, "learning_rate": 3.058162022327988e-05, "loss": 3.0873, "step": 1237500 }, { "epoch": 7.77, "learning_rate": 3.0573771521141265e-05, "loss": 3.09, "step": 1238000 }, { "epoch": 7.78, "learning_rate": 3.0565922819002653e-05, "loss": 3.0838, "step": 1238500 }, { "epoch": 7.78, "learning_rate": 3.055807411686404e-05, "loss": 3.08, "step": 1239000 }, { "epoch": 7.78, "learning_rate": 3.055022541472542e-05, "loss": 3.0811, "step": 1239500 }, { "epoch": 7.79, "learning_rate": 3.0542376712586804e-05, "loss": 3.082, "step": 1240000 }, { "epoch": 7.79, "learning_rate": 3.053452801044819e-05, "loss": 3.0769, "step": 1240500 }, { "epoch": 7.79, "learning_rate": 3.052667930830958e-05, "loss": 3.0859, "step": 1241000 }, { "epoch": 7.8, "learning_rate": 3.051883060617097e-05, "loss": 3.0794, "step": 1241500 }, { "epoch": 7.8, "learning_rate": 3.051098190403235e-05, "loss": 3.0787, "step": 1242000 }, { "epoch": 7.8, "learning_rate": 3.050314889929801e-05, "loss": 3.079, "step": 1242500 }, { "epoch": 7.8, "learning_rate": 3.04953001971594e-05, "loss": 3.0822, "step": 1243000 }, { "epoch": 7.81, "learning_rate": 3.0487451495020784e-05, "loss": 3.079, "step": 1243500 }, { "epoch": 7.81, "learning_rate": 3.0479602792882172e-05, "loss": 3.0826, "step": 1244000 }, { "epoch": 7.81, "learning_rate": 3.0471754090743554e-05, "loss": 3.0816, "step": 1244500 }, { "epoch": 7.82, "learning_rate": 3.0463905388604942e-05, "loss": 3.0872, "step": 1245000 }, { "epoch": 7.82, "learning_rate": 3.045608808127488e-05, "loss": 3.0818, "step": 1245500 }, { "epoch": 7.82, "learning_rate": 3.0448239379136268e-05, "loss": 3.0709, "step": 1246000 }, { "epoch": 7.83, "learning_rate": 3.0440390676997653e-05, "loss": 3.0836, "step": 1246500 }, { "epoch": 7.83, "learning_rate": 3.043254197485904e-05, "loss": 3.0783, "step": 1247000 }, { "epoch": 7.83, "learning_rate": 3.0424693272720422e-05, "loss": 3.08, "step": 1247500 }, { "epoch": 7.84, "learning_rate": 3.041684457058181e-05, "loss": 3.0761, "step": 1248000 }, { "epoch": 7.84, "learning_rate": 3.0409011565847472e-05, "loss": 3.0774, "step": 1248500 }, { "epoch": 7.84, "learning_rate": 3.040116286370886e-05, "loss": 3.0817, "step": 1249000 }, { "epoch": 7.85, "learning_rate": 3.0393314161570248e-05, "loss": 3.0831, "step": 1249500 }, { "epoch": 7.85, "learning_rate": 3.038546545943163e-05, "loss": 3.0726, "step": 1250000 }, { "epoch": 7.85, "learning_rate": 3.0377616757293014e-05, "loss": 3.079, "step": 1250500 }, { "epoch": 7.85, "learning_rate": 3.0369768055154402e-05, "loss": 3.082, "step": 1251000 }, { "epoch": 7.86, "learning_rate": 3.0361919353015787e-05, "loss": 3.0881, "step": 1251500 }, { "epoch": 7.86, "learning_rate": 3.035407065087717e-05, "loss": 3.0812, "step": 1252000 }, { "epoch": 7.86, "learning_rate": 3.0346221948738556e-05, "loss": 3.0804, "step": 1252500 }, { "epoch": 7.87, "learning_rate": 3.0338373246599945e-05, "loss": 3.0726, "step": 1253000 }, { "epoch": 7.87, "learning_rate": 3.0330540241865606e-05, "loss": 3.0848, "step": 1253500 }, { "epoch": 7.87, "learning_rate": 3.0322691539726994e-05, "loss": 3.0808, "step": 1254000 }, { "epoch": 7.88, "learning_rate": 3.0314842837588375e-05, "loss": 3.0816, "step": 1254500 }, { "epoch": 7.88, "learning_rate": 3.0306994135449764e-05, "loss": 3.0856, "step": 1255000 }, { "epoch": 7.88, "learning_rate": 3.029914543331115e-05, "loss": 3.0825, "step": 1255500 }, { "epoch": 7.89, "learning_rate": 3.0291296731172536e-05, "loss": 3.0803, "step": 1256000 }, { "epoch": 7.89, "learning_rate": 3.028344802903392e-05, "loss": 3.0827, "step": 1256500 }, { "epoch": 7.89, "learning_rate": 3.0275599326895303e-05, "loss": 3.0775, "step": 1257000 }, { "epoch": 7.9, "learning_rate": 3.0267766322160967e-05, "loss": 3.0741, "step": 1257500 }, { "epoch": 7.9, "learning_rate": 3.0259917620022355e-05, "loss": 3.0804, "step": 1258000 }, { "epoch": 7.9, "learning_rate": 3.025206891788374e-05, "loss": 3.075, "step": 1258500 }, { "epoch": 7.91, "learning_rate": 3.024422021574513e-05, "loss": 3.0757, "step": 1259000 }, { "epoch": 7.91, "learning_rate": 3.0236387211010786e-05, "loss": 3.0725, "step": 1259500 }, { "epoch": 7.91, "learning_rate": 3.0228538508872174e-05, "loss": 3.0816, "step": 1260000 }, { "epoch": 7.91, "eval_accuracy": 0.44925768668344684, "eval_loss": 2.9951493740081787, "eval_runtime": 1449.0148, "eval_samples_per_second": 57.833, "eval_steps_per_second": 5.784, "step": 1260000 }, { "epoch": 7.91, "learning_rate": 3.022068980673356e-05, "loss": 3.0826, "step": 1260500 }, { "epoch": 7.92, "learning_rate": 3.0212841104594947e-05, "loss": 3.0766, "step": 1261000 }, { "epoch": 7.92, "learning_rate": 3.020500809986061e-05, "loss": 3.0778, "step": 1261500 }, { "epoch": 7.92, "learning_rate": 3.0197159397721997e-05, "loss": 3.0798, "step": 1262000 }, { "epoch": 7.93, "learning_rate": 3.0189310695583378e-05, "loss": 3.0748, "step": 1262500 }, { "epoch": 7.93, "learning_rate": 3.0181461993444766e-05, "loss": 3.0821, "step": 1263000 }, { "epoch": 7.93, "learning_rate": 3.017361329130615e-05, "loss": 3.0821, "step": 1263500 }, { "epoch": 7.94, "learning_rate": 3.0165780286571816e-05, "loss": 3.0742, "step": 1264000 }, { "epoch": 7.94, "learning_rate": 3.01579315844332e-05, "loss": 3.0842, "step": 1264500 }, { "epoch": 7.94, "learning_rate": 3.0150082882294582e-05, "loss": 3.0834, "step": 1265000 }, { "epoch": 7.95, "learning_rate": 3.0142249877560247e-05, "loss": 3.0774, "step": 1265500 }, { "epoch": 7.95, "learning_rate": 3.013441687282591e-05, "loss": 3.0779, "step": 1266000 }, { "epoch": 7.95, "learning_rate": 3.0126568170687296e-05, "loss": 3.08, "step": 1266500 }, { "epoch": 7.96, "learning_rate": 3.0118719468548684e-05, "loss": 3.0762, "step": 1267000 }, { "epoch": 7.96, "learning_rate": 3.0110870766410066e-05, "loss": 3.0871, "step": 1267500 }, { "epoch": 7.96, "learning_rate": 3.0103022064271454e-05, "loss": 3.0767, "step": 1268000 }, { "epoch": 7.96, "learning_rate": 3.009517336213284e-05, "loss": 3.0809, "step": 1268500 }, { "epoch": 7.97, "learning_rate": 3.0087324659994227e-05, "loss": 3.0787, "step": 1269000 }, { "epoch": 7.97, "learning_rate": 3.007947595785561e-05, "loss": 3.0861, "step": 1269500 }, { "epoch": 7.97, "learning_rate": 3.007164295312127e-05, "loss": 3.0798, "step": 1270000 }, { "epoch": 7.98, "learning_rate": 3.0063794250982657e-05, "loss": 3.0813, "step": 1270500 }, { "epoch": 7.98, "learning_rate": 3.0055945548844046e-05, "loss": 3.0751, "step": 1271000 }, { "epoch": 7.98, "learning_rate": 3.004809684670543e-05, "loss": 3.0788, "step": 1271500 }, { "epoch": 7.99, "learning_rate": 3.004024814456682e-05, "loss": 3.0749, "step": 1272000 }, { "epoch": 7.99, "learning_rate": 3.00323994424282e-05, "loss": 3.0831, "step": 1272500 }, { "epoch": 7.99, "learning_rate": 3.0024566437693865e-05, "loss": 3.0801, "step": 1273000 }, { "epoch": 8.0, "learning_rate": 3.001671773555525e-05, "loss": 3.0838, "step": 1273500 }, { "epoch": 8.0, "learning_rate": 3.0008884730820914e-05, "loss": 3.0696, "step": 1274000 }, { "epoch": 8.0, "learning_rate": 3.00010360286823e-05, "loss": 3.0676, "step": 1274500 }, { "epoch": 8.01, "learning_rate": 2.9993187326543687e-05, "loss": 3.0625, "step": 1275000 }, { "epoch": 8.01, "learning_rate": 2.998533862440507e-05, "loss": 3.0684, "step": 1275500 }, { "epoch": 8.01, "learning_rate": 2.9977489922266456e-05, "loss": 3.0628, "step": 1276000 }, { "epoch": 8.02, "learning_rate": 2.996964122012784e-05, "loss": 3.0546, "step": 1276500 }, { "epoch": 8.02, "learning_rate": 2.996179251798923e-05, "loss": 3.0646, "step": 1277000 }, { "epoch": 8.02, "learning_rate": 2.995394381585061e-05, "loss": 3.0641, "step": 1277500 }, { "epoch": 8.02, "learning_rate": 2.9946095113712e-05, "loss": 3.0512, "step": 1278000 }, { "epoch": 8.03, "learning_rate": 2.9938246411573384e-05, "loss": 3.0623, "step": 1278500 }, { "epoch": 8.03, "learning_rate": 2.993039770943477e-05, "loss": 3.0523, "step": 1279000 }, { "epoch": 8.03, "learning_rate": 2.9922564704700433e-05, "loss": 3.0666, "step": 1279500 }, { "epoch": 8.04, "learning_rate": 2.9914716002561814e-05, "loss": 3.0603, "step": 1280000 }, { "epoch": 8.04, "learning_rate": 2.9906867300423203e-05, "loss": 3.0639, "step": 1280500 }, { "epoch": 8.04, "learning_rate": 2.989901859828459e-05, "loss": 3.0699, "step": 1281000 }, { "epoch": 8.05, "learning_rate": 2.9891169896145975e-05, "loss": 3.0647, "step": 1281500 }, { "epoch": 8.05, "learning_rate": 2.9883321194007357e-05, "loss": 3.0639, "step": 1282000 }, { "epoch": 8.05, "learning_rate": 2.9875472491868745e-05, "loss": 3.0568, "step": 1282500 }, { "epoch": 8.06, "learning_rate": 2.9867623789730133e-05, "loss": 3.0583, "step": 1283000 }, { "epoch": 8.06, "learning_rate": 2.9859790784995794e-05, "loss": 3.0622, "step": 1283500 }, { "epoch": 8.06, "learning_rate": 2.9851942082857183e-05, "loss": 3.0593, "step": 1284000 }, { "epoch": 8.07, "learning_rate": 2.9844109078122844e-05, "loss": 3.0656, "step": 1284500 }, { "epoch": 8.07, "learning_rate": 2.9836260375984225e-05, "loss": 3.0589, "step": 1285000 }, { "epoch": 8.07, "learning_rate": 2.9828411673845613e-05, "loss": 3.0638, "step": 1285500 }, { "epoch": 8.07, "learning_rate": 2.9820562971707e-05, "loss": 3.0641, "step": 1286000 }, { "epoch": 8.08, "learning_rate": 2.9812714269568386e-05, "loss": 3.0626, "step": 1286500 }, { "epoch": 8.08, "learning_rate": 2.9804865567429774e-05, "loss": 3.0657, "step": 1287000 }, { "epoch": 8.08, "learning_rate": 2.9797016865291156e-05, "loss": 3.0594, "step": 1287500 }, { "epoch": 8.09, "learning_rate": 2.9789183860556817e-05, "loss": 3.0656, "step": 1288000 }, { "epoch": 8.09, "learning_rate": 2.9781335158418205e-05, "loss": 3.0657, "step": 1288500 }, { "epoch": 8.09, "learning_rate": 2.9773486456279593e-05, "loss": 3.0643, "step": 1289000 }, { "epoch": 8.1, "learning_rate": 2.9765637754140978e-05, "loss": 3.0578, "step": 1289500 }, { "epoch": 8.1, "learning_rate": 2.975778905200236e-05, "loss": 3.0657, "step": 1290000 }, { "epoch": 8.1, "eval_accuracy": 0.44956057339349403, "eval_loss": 2.9942307472229004, "eval_runtime": 1450.2021, "eval_samples_per_second": 57.786, "eval_steps_per_second": 5.779, "step": 1290000 }, { "epoch": 8.1, "learning_rate": 2.9749940349863748e-05, "loss": 3.0635, "step": 1290500 }, { "epoch": 8.11, "learning_rate": 2.9742107345129412e-05, "loss": 3.0605, "step": 1291000 }, { "epoch": 8.11, "learning_rate": 2.9734258642990797e-05, "loss": 3.0704, "step": 1291500 }, { "epoch": 8.11, "learning_rate": 2.9726409940852185e-05, "loss": 3.0683, "step": 1292000 }, { "epoch": 8.12, "learning_rate": 2.9718576936117843e-05, "loss": 3.0639, "step": 1292500 }, { "epoch": 8.12, "learning_rate": 2.9710728233979228e-05, "loss": 3.0617, "step": 1293000 }, { "epoch": 8.12, "learning_rate": 2.9702895229244893e-05, "loss": 3.0544, "step": 1293500 }, { "epoch": 8.12, "learning_rate": 2.969504652710628e-05, "loss": 3.0637, "step": 1294000 }, { "epoch": 8.13, "learning_rate": 2.9687197824967666e-05, "loss": 3.0597, "step": 1294500 }, { "epoch": 8.13, "learning_rate": 2.9679349122829047e-05, "loss": 3.0668, "step": 1295000 }, { "epoch": 8.13, "learning_rate": 2.9671500420690435e-05, "loss": 3.0692, "step": 1295500 }, { "epoch": 8.14, "learning_rate": 2.966365171855182e-05, "loss": 3.0694, "step": 1296000 }, { "epoch": 8.14, "learning_rate": 2.9655803016413208e-05, "loss": 3.0647, "step": 1296500 }, { "epoch": 8.14, "learning_rate": 2.9647954314274596e-05, "loss": 3.0633, "step": 1297000 }, { "epoch": 8.15, "learning_rate": 2.9640105612135977e-05, "loss": 3.0665, "step": 1297500 }, { "epoch": 8.15, "learning_rate": 2.9632256909997362e-05, "loss": 3.0651, "step": 1298000 }, { "epoch": 8.15, "learning_rate": 2.962440820785875e-05, "loss": 3.0623, "step": 1298500 }, { "epoch": 8.16, "learning_rate": 2.961655950572014e-05, "loss": 3.0667, "step": 1299000 }, { "epoch": 8.16, "learning_rate": 2.96087265009858e-05, "loss": 3.0631, "step": 1299500 }, { "epoch": 8.16, "learning_rate": 2.960087779884718e-05, "loss": 3.0685, "step": 1300000 }, { "epoch": 8.17, "learning_rate": 2.959302909670857e-05, "loss": 3.0605, "step": 1300500 }, { "epoch": 8.17, "learning_rate": 2.9585180394569957e-05, "loss": 3.0598, "step": 1301000 }, { "epoch": 8.17, "learning_rate": 2.9577331692431342e-05, "loss": 3.0617, "step": 1301500 }, { "epoch": 8.18, "learning_rate": 2.9569482990292723e-05, "loss": 3.0692, "step": 1302000 }, { "epoch": 8.18, "learning_rate": 2.956163428815411e-05, "loss": 3.0706, "step": 1302500 }, { "epoch": 8.18, "learning_rate": 2.9553785586015496e-05, "loss": 3.0672, "step": 1303000 }, { "epoch": 8.18, "learning_rate": 2.9545936883876884e-05, "loss": 3.0626, "step": 1303500 }, { "epoch": 8.19, "learning_rate": 2.9538088181738273e-05, "loss": 3.0705, "step": 1304000 }, { "epoch": 8.19, "learning_rate": 2.953025517700393e-05, "loss": 3.0695, "step": 1304500 }, { "epoch": 8.19, "learning_rate": 2.9522422172269592e-05, "loss": 3.0723, "step": 1305000 }, { "epoch": 8.2, "learning_rate": 2.951457347013098e-05, "loss": 3.0674, "step": 1305500 }, { "epoch": 8.2, "learning_rate": 2.9506724767992365e-05, "loss": 3.0687, "step": 1306000 }, { "epoch": 8.2, "learning_rate": 2.949889176325803e-05, "loss": 3.0659, "step": 1306500 }, { "epoch": 8.21, "learning_rate": 2.9491043061119418e-05, "loss": 3.0657, "step": 1307000 }, { "epoch": 8.21, "learning_rate": 2.94831943589808e-05, "loss": 3.0651, "step": 1307500 }, { "epoch": 8.21, "learning_rate": 2.9475345656842184e-05, "loss": 3.0588, "step": 1308000 }, { "epoch": 8.22, "learning_rate": 2.9467496954703572e-05, "loss": 3.0688, "step": 1308500 }, { "epoch": 8.22, "learning_rate": 2.945964825256496e-05, "loss": 3.06, "step": 1309000 }, { "epoch": 8.22, "learning_rate": 2.9451799550426345e-05, "loss": 3.0695, "step": 1309500 }, { "epoch": 8.23, "learning_rate": 2.9443950848287726e-05, "loss": 3.0683, "step": 1310000 }, { "epoch": 8.23, "learning_rate": 2.943611784355339e-05, "loss": 3.0606, "step": 1310500 }, { "epoch": 8.23, "learning_rate": 2.9428269141414776e-05, "loss": 3.0648, "step": 1311000 }, { "epoch": 8.23, "learning_rate": 2.9420420439276164e-05, "loss": 3.0708, "step": 1311500 }, { "epoch": 8.24, "learning_rate": 2.9412571737137552e-05, "loss": 3.0672, "step": 1312000 }, { "epoch": 8.24, "learning_rate": 2.9404723034998933e-05, "loss": 3.0678, "step": 1312500 }, { "epoch": 8.24, "learning_rate": 2.9396874332860318e-05, "loss": 3.0688, "step": 1313000 }, { "epoch": 8.25, "learning_rate": 2.9389041328125983e-05, "loss": 3.0704, "step": 1313500 }, { "epoch": 8.25, "learning_rate": 2.938119262598737e-05, "loss": 3.0712, "step": 1314000 }, { "epoch": 8.25, "learning_rate": 2.9373343923848756e-05, "loss": 3.0691, "step": 1314500 }, { "epoch": 8.26, "learning_rate": 2.9365495221710137e-05, "loss": 3.0681, "step": 1315000 }, { "epoch": 8.26, "learning_rate": 2.9357646519571525e-05, "loss": 3.0662, "step": 1315500 }, { "epoch": 8.26, "learning_rate": 2.934979781743291e-05, "loss": 3.0666, "step": 1316000 }, { "epoch": 8.27, "learning_rate": 2.9341949115294298e-05, "loss": 3.0711, "step": 1316500 }, { "epoch": 8.27, "learning_rate": 2.933410041315568e-05, "loss": 3.0656, "step": 1317000 }, { "epoch": 8.27, "learning_rate": 2.9326251711017067e-05, "loss": 3.0634, "step": 1317500 }, { "epoch": 8.28, "learning_rate": 2.9318403008878452e-05, "loss": 3.0685, "step": 1318000 }, { "epoch": 8.28, "learning_rate": 2.931055430673984e-05, "loss": 3.0654, "step": 1318500 }, { "epoch": 8.28, "learning_rate": 2.930270560460123e-05, "loss": 3.0631, "step": 1319000 }, { "epoch": 8.29, "learning_rate": 2.9294872599866886e-05, "loss": 3.0641, "step": 1319500 }, { "epoch": 8.29, "learning_rate": 2.928702389772827e-05, "loss": 3.0644, "step": 1320000 }, { "epoch": 8.29, "eval_accuracy": 0.4497577425400675, "eval_loss": 2.991508960723877, "eval_runtime": 1449.1322, "eval_samples_per_second": 57.828, "eval_steps_per_second": 5.783, "step": 1320000 }, { "epoch": 8.29, "learning_rate": 2.9279190892993936e-05, "loss": 3.0679, "step": 1320500 }, { "epoch": 8.29, "learning_rate": 2.927134219085532e-05, "loss": 3.0743, "step": 1321000 }, { "epoch": 8.3, "learning_rate": 2.926349348871671e-05, "loss": 3.0621, "step": 1321500 }, { "epoch": 8.3, "learning_rate": 2.925564478657809e-05, "loss": 3.064, "step": 1322000 }, { "epoch": 8.3, "learning_rate": 2.9247811781843755e-05, "loss": 3.065, "step": 1322500 }, { "epoch": 8.31, "learning_rate": 2.923996307970514e-05, "loss": 3.0702, "step": 1323000 }, { "epoch": 8.31, "learning_rate": 2.9232114377566528e-05, "loss": 3.0617, "step": 1323500 }, { "epoch": 8.31, "learning_rate": 2.9224265675427916e-05, "loss": 3.0718, "step": 1324000 }, { "epoch": 8.32, "learning_rate": 2.9216416973289297e-05, "loss": 3.0681, "step": 1324500 }, { "epoch": 8.32, "learning_rate": 2.9208568271150682e-05, "loss": 3.0631, "step": 1325000 }, { "epoch": 8.32, "learning_rate": 2.920071956901207e-05, "loss": 3.0672, "step": 1325500 }, { "epoch": 8.33, "learning_rate": 2.9192870866873455e-05, "loss": 3.0621, "step": 1326000 }, { "epoch": 8.33, "learning_rate": 2.918503786213912e-05, "loss": 3.0694, "step": 1326500 }, { "epoch": 8.33, "learning_rate": 2.91771891600005e-05, "loss": 3.0669, "step": 1327000 }, { "epoch": 8.34, "learning_rate": 2.916934045786189e-05, "loss": 3.0725, "step": 1327500 }, { "epoch": 8.34, "learning_rate": 2.9161491755723274e-05, "loss": 3.0695, "step": 1328000 }, { "epoch": 8.34, "learning_rate": 2.9153643053584662e-05, "loss": 3.0711, "step": 1328500 }, { "epoch": 8.34, "learning_rate": 2.9145810048850323e-05, "loss": 3.067, "step": 1329000 }, { "epoch": 8.35, "learning_rate": 2.913796134671171e-05, "loss": 3.0701, "step": 1329500 }, { "epoch": 8.35, "learning_rate": 2.9130112644573093e-05, "loss": 3.07, "step": 1330000 }, { "epoch": 8.35, "learning_rate": 2.912226394243448e-05, "loss": 3.0691, "step": 1330500 }, { "epoch": 8.36, "learning_rate": 2.9114430937700142e-05, "loss": 3.0658, "step": 1331000 }, { "epoch": 8.36, "learning_rate": 2.910658223556153e-05, "loss": 3.0701, "step": 1331500 }, { "epoch": 8.36, "learning_rate": 2.9098749230827195e-05, "loss": 3.0674, "step": 1332000 }, { "epoch": 8.37, "learning_rate": 2.9090900528688577e-05, "loss": 3.0693, "step": 1332500 }, { "epoch": 8.37, "learning_rate": 2.908305182654996e-05, "loss": 3.071, "step": 1333000 }, { "epoch": 8.37, "learning_rate": 2.907520312441135e-05, "loss": 3.0692, "step": 1333500 }, { "epoch": 8.38, "learning_rate": 2.9067354422272734e-05, "loss": 3.0683, "step": 1334000 }, { "epoch": 8.38, "learning_rate": 2.9059505720134122e-05, "loss": 3.0697, "step": 1334500 }, { "epoch": 8.38, "learning_rate": 2.905167271539978e-05, "loss": 3.0717, "step": 1335000 }, { "epoch": 8.39, "learning_rate": 2.9043839710665445e-05, "loss": 3.0665, "step": 1335500 }, { "epoch": 8.39, "learning_rate": 2.903599100852683e-05, "loss": 3.0683, "step": 1336000 }, { "epoch": 8.39, "learning_rate": 2.9028158003792495e-05, "loss": 3.0713, "step": 1336500 }, { "epoch": 8.39, "learning_rate": 2.902030930165388e-05, "loss": 3.0692, "step": 1337000 }, { "epoch": 8.4, "learning_rate": 2.9012460599515267e-05, "loss": 3.067, "step": 1337500 }, { "epoch": 8.4, "learning_rate": 2.900461189737665e-05, "loss": 3.0738, "step": 1338000 }, { "epoch": 8.4, "learning_rate": 2.8996763195238037e-05, "loss": 3.07, "step": 1338500 }, { "epoch": 8.41, "learning_rate": 2.898891449309942e-05, "loss": 3.0754, "step": 1339000 }, { "epoch": 8.41, "learning_rate": 2.8981081488365086e-05, "loss": 3.0624, "step": 1339500 }, { "epoch": 8.41, "learning_rate": 2.8973232786226475e-05, "loss": 3.0705, "step": 1340000 }, { "epoch": 8.42, "learning_rate": 2.8965384084087856e-05, "loss": 3.0672, "step": 1340500 }, { "epoch": 8.42, "learning_rate": 2.895753538194924e-05, "loss": 3.0661, "step": 1341000 }, { "epoch": 8.42, "learning_rate": 2.894968667981063e-05, "loss": 3.0736, "step": 1341500 }, { "epoch": 8.43, "learning_rate": 2.8941837977672014e-05, "loss": 3.0661, "step": 1342000 }, { "epoch": 8.43, "learning_rate": 2.89339892755334e-05, "loss": 3.067, "step": 1342500 }, { "epoch": 8.43, "learning_rate": 2.8926140573394783e-05, "loss": 3.0748, "step": 1343000 }, { "epoch": 8.44, "learning_rate": 2.891829187125617e-05, "loss": 3.0762, "step": 1343500 }, { "epoch": 8.44, "learning_rate": 2.8910443169117556e-05, "loss": 3.0717, "step": 1344000 }, { "epoch": 8.44, "learning_rate": 2.8902594466978944e-05, "loss": 3.0654, "step": 1344500 }, { "epoch": 8.45, "learning_rate": 2.8894745764840325e-05, "loss": 3.067, "step": 1345000 }, { "epoch": 8.45, "learning_rate": 2.8886897062701714e-05, "loss": 3.0658, "step": 1345500 }, { "epoch": 8.45, "learning_rate": 2.8879064057967375e-05, "loss": 3.0697, "step": 1346000 }, { "epoch": 8.45, "learning_rate": 2.8871215355828763e-05, "loss": 3.0679, "step": 1346500 }, { "epoch": 8.46, "learning_rate": 2.8863366653690148e-05, "loss": 3.0645, "step": 1347000 }, { "epoch": 8.46, "learning_rate": 2.885551795155153e-05, "loss": 3.0662, "step": 1347500 }, { "epoch": 8.46, "learning_rate": 2.8847669249412917e-05, "loss": 3.0647, "step": 1348000 }, { "epoch": 8.47, "learning_rate": 2.8839836244678582e-05, "loss": 3.068, "step": 1348500 }, { "epoch": 8.47, "learning_rate": 2.8831987542539967e-05, "loss": 3.0719, "step": 1349000 }, { "epoch": 8.47, "learning_rate": 2.8824138840401355e-05, "loss": 3.0714, "step": 1349500 }, { "epoch": 8.48, "learning_rate": 2.8816290138262736e-05, "loss": 3.0605, "step": 1350000 }, { "epoch": 8.48, "eval_accuracy": 0.45017283241055306, "eval_loss": 2.98885178565979, "eval_runtime": 1452.9965, "eval_samples_per_second": 57.675, "eval_steps_per_second": 5.768, "step": 1350000 }, { "epoch": 8.48, "learning_rate": 2.8808441436124124e-05, "loss": 3.0735, "step": 1350500 }, { "epoch": 8.48, "learning_rate": 2.880059273398551e-05, "loss": 3.0678, "step": 1351000 }, { "epoch": 8.49, "learning_rate": 2.8792759729251174e-05, "loss": 3.0646, "step": 1351500 }, { "epoch": 8.49, "learning_rate": 2.878491102711256e-05, "loss": 3.0668, "step": 1352000 }, { "epoch": 8.49, "learning_rate": 2.877706232497394e-05, "loss": 3.0726, "step": 1352500 }, { "epoch": 8.5, "learning_rate": 2.8769213622835328e-05, "loss": 3.0659, "step": 1353000 }, { "epoch": 8.5, "learning_rate": 2.8761364920696716e-05, "loss": 3.0703, "step": 1353500 }, { "epoch": 8.5, "learning_rate": 2.87535162185581e-05, "loss": 3.0652, "step": 1354000 }, { "epoch": 8.5, "learning_rate": 2.8745698911228042e-05, "loss": 3.07, "step": 1354500 }, { "epoch": 8.51, "learning_rate": 2.8737850209089427e-05, "loss": 3.0668, "step": 1355000 }, { "epoch": 8.51, "learning_rate": 2.873000150695081e-05, "loss": 3.0689, "step": 1355500 }, { "epoch": 8.51, "learning_rate": 2.8722152804812197e-05, "loss": 3.0669, "step": 1356000 }, { "epoch": 8.52, "learning_rate": 2.8714304102673585e-05, "loss": 3.0688, "step": 1356500 }, { "epoch": 8.52, "learning_rate": 2.870645540053497e-05, "loss": 3.0662, "step": 1357000 }, { "epoch": 8.52, "learning_rate": 2.869860669839635e-05, "loss": 3.0643, "step": 1357500 }, { "epoch": 8.53, "learning_rate": 2.869075799625774e-05, "loss": 3.0686, "step": 1358000 }, { "epoch": 8.53, "learning_rate": 2.8682909294119127e-05, "loss": 3.0577, "step": 1358500 }, { "epoch": 8.53, "learning_rate": 2.8675060591980512e-05, "loss": 3.0609, "step": 1359000 }, { "epoch": 8.54, "learning_rate": 2.86672118898419e-05, "loss": 3.0604, "step": 1359500 }, { "epoch": 8.54, "learning_rate": 2.865936318770328e-05, "loss": 3.0747, "step": 1360000 }, { "epoch": 8.54, "learning_rate": 2.865151448556467e-05, "loss": 3.0703, "step": 1360500 }, { "epoch": 8.55, "learning_rate": 2.864368148083033e-05, "loss": 3.0668, "step": 1361000 }, { "epoch": 8.55, "learning_rate": 2.8635848476095996e-05, "loss": 3.0707, "step": 1361500 }, { "epoch": 8.55, "learning_rate": 2.8628015471361657e-05, "loss": 3.0623, "step": 1362000 }, { "epoch": 8.56, "learning_rate": 2.8620166769223045e-05, "loss": 3.0689, "step": 1362500 }, { "epoch": 8.56, "learning_rate": 2.8612318067084426e-05, "loss": 3.0725, "step": 1363000 }, { "epoch": 8.56, "learning_rate": 2.8604469364945814e-05, "loss": 3.0699, "step": 1363500 }, { "epoch": 8.56, "learning_rate": 2.85966206628072e-05, "loss": 3.0712, "step": 1364000 }, { "epoch": 8.57, "learning_rate": 2.8588771960668587e-05, "loss": 3.0672, "step": 1364500 }, { "epoch": 8.57, "learning_rate": 2.8580923258529972e-05, "loss": 3.0713, "step": 1365000 }, { "epoch": 8.57, "learning_rate": 2.8573074556391353e-05, "loss": 3.0664, "step": 1365500 }, { "epoch": 8.58, "learning_rate": 2.8565241551657018e-05, "loss": 3.0636, "step": 1366000 }, { "epoch": 8.58, "learning_rate": 2.8557392849518406e-05, "loss": 3.0681, "step": 1366500 }, { "epoch": 8.58, "learning_rate": 2.854954414737979e-05, "loss": 3.0643, "step": 1367000 }, { "epoch": 8.59, "learning_rate": 2.854169544524118e-05, "loss": 3.0685, "step": 1367500 }, { "epoch": 8.59, "learning_rate": 2.8533862440506837e-05, "loss": 3.0706, "step": 1368000 }, { "epoch": 8.59, "learning_rate": 2.8526013738368222e-05, "loss": 3.0637, "step": 1368500 }, { "epoch": 8.6, "learning_rate": 2.851816503622961e-05, "loss": 3.0747, "step": 1369000 }, { "epoch": 8.6, "learning_rate": 2.8510332031495275e-05, "loss": 3.0683, "step": 1369500 }, { "epoch": 8.6, "learning_rate": 2.850248332935666e-05, "loss": 3.0685, "step": 1370000 }, { "epoch": 8.61, "learning_rate": 2.849463462721804e-05, "loss": 3.0676, "step": 1370500 }, { "epoch": 8.61, "learning_rate": 2.848678592507943e-05, "loss": 3.07, "step": 1371000 }, { "epoch": 8.61, "learning_rate": 2.8478937222940817e-05, "loss": 3.0688, "step": 1371500 }, { "epoch": 8.61, "learning_rate": 2.847110421820648e-05, "loss": 3.0649, "step": 1372000 }, { "epoch": 8.62, "learning_rate": 2.8463255516067867e-05, "loss": 3.0735, "step": 1372500 }, { "epoch": 8.62, "learning_rate": 2.8455406813929248e-05, "loss": 3.0607, "step": 1373000 }, { "epoch": 8.62, "learning_rate": 2.844757380919491e-05, "loss": 3.0649, "step": 1373500 }, { "epoch": 8.63, "learning_rate": 2.8439725107056298e-05, "loss": 3.0696, "step": 1374000 }, { "epoch": 8.63, "learning_rate": 2.8431876404917686e-05, "loss": 3.0651, "step": 1374500 }, { "epoch": 8.63, "learning_rate": 2.842402770277907e-05, "loss": 3.0585, "step": 1375000 }, { "epoch": 8.64, "learning_rate": 2.8416179000640452e-05, "loss": 3.0635, "step": 1375500 }, { "epoch": 8.64, "learning_rate": 2.840833029850184e-05, "loss": 3.0599, "step": 1376000 }, { "epoch": 8.64, "learning_rate": 2.8400481596363228e-05, "loss": 3.0695, "step": 1376500 }, { "epoch": 8.65, "learning_rate": 2.8392632894224613e-05, "loss": 3.0612, "step": 1377000 }, { "epoch": 8.65, "learning_rate": 2.8384784192086e-05, "loss": 3.0662, "step": 1377500 }, { "epoch": 8.65, "learning_rate": 2.8376951187351662e-05, "loss": 3.0755, "step": 1378000 }, { "epoch": 8.66, "learning_rate": 2.8369102485213044e-05, "loss": 3.0684, "step": 1378500 }, { "epoch": 8.66, "learning_rate": 2.8361253783074432e-05, "loss": 3.071, "step": 1379000 }, { "epoch": 8.66, "learning_rate": 2.8353420778340097e-05, "loss": 3.0682, "step": 1379500 }, { "epoch": 8.66, "learning_rate": 2.834557207620148e-05, "loss": 3.0671, "step": 1380000 }, { "epoch": 8.66, "eval_accuracy": 0.45055065342797684, "eval_loss": 2.9864501953125, "eval_runtime": 1454.641, "eval_samples_per_second": 57.609, "eval_steps_per_second": 5.762, "step": 1380000 }, { "epoch": 8.67, "learning_rate": 2.833772337406287e-05, "loss": 3.0622, "step": 1380500 }, { "epoch": 8.67, "learning_rate": 2.832987467192425e-05, "loss": 3.0702, "step": 1381000 }, { "epoch": 8.67, "learning_rate": 2.8322025969785635e-05, "loss": 3.0579, "step": 1381500 }, { "epoch": 8.68, "learning_rate": 2.8314177267647024e-05, "loss": 3.0698, "step": 1382000 }, { "epoch": 8.68, "learning_rate": 2.8306328565508412e-05, "loss": 3.066, "step": 1382500 }, { "epoch": 8.68, "learning_rate": 2.8298479863369793e-05, "loss": 3.0659, "step": 1383000 }, { "epoch": 8.69, "learning_rate": 2.8290646858635454e-05, "loss": 3.073, "step": 1383500 }, { "epoch": 8.69, "learning_rate": 2.8282798156496843e-05, "loss": 3.0697, "step": 1384000 }, { "epoch": 8.69, "learning_rate": 2.827494945435823e-05, "loss": 3.0719, "step": 1384500 }, { "epoch": 8.7, "learning_rate": 2.8267100752219615e-05, "loss": 3.0675, "step": 1385000 }, { "epoch": 8.7, "learning_rate": 2.8259252050080997e-05, "loss": 3.0665, "step": 1385500 }, { "epoch": 8.7, "learning_rate": 2.8251403347942385e-05, "loss": 3.0756, "step": 1386000 }, { "epoch": 8.71, "learning_rate": 2.8243554645803773e-05, "loss": 3.0697, "step": 1386500 }, { "epoch": 8.71, "learning_rate": 2.8235705943665158e-05, "loss": 3.0662, "step": 1387000 }, { "epoch": 8.71, "learning_rate": 2.8227857241526546e-05, "loss": 3.062, "step": 1387500 }, { "epoch": 8.72, "learning_rate": 2.8220008539387927e-05, "loss": 3.0639, "step": 1388000 }, { "epoch": 8.72, "learning_rate": 2.8212159837249312e-05, "loss": 3.0663, "step": 1388500 }, { "epoch": 8.72, "learning_rate": 2.82043111351107e-05, "loss": 3.0656, "step": 1389000 }, { "epoch": 8.72, "learning_rate": 2.8196462432972088e-05, "loss": 3.069, "step": 1389500 }, { "epoch": 8.73, "learning_rate": 2.818862942823775e-05, "loss": 3.0654, "step": 1390000 }, { "epoch": 8.73, "learning_rate": 2.818078072609913e-05, "loss": 3.0577, "step": 1390500 }, { "epoch": 8.73, "learning_rate": 2.817293202396052e-05, "loss": 3.0704, "step": 1391000 }, { "epoch": 8.74, "learning_rate": 2.8165083321821907e-05, "loss": 3.0684, "step": 1391500 }, { "epoch": 8.74, "learning_rate": 2.8157234619683292e-05, "loss": 3.066, "step": 1392000 }, { "epoch": 8.74, "learning_rate": 2.8149385917544673e-05, "loss": 3.0631, "step": 1392500 }, { "epoch": 8.75, "learning_rate": 2.814153721540606e-05, "loss": 3.062, "step": 1393000 }, { "epoch": 8.75, "learning_rate": 2.8133688513267446e-05, "loss": 3.0649, "step": 1393500 }, { "epoch": 8.75, "learning_rate": 2.812585550853311e-05, "loss": 3.0714, "step": 1394000 }, { "epoch": 8.76, "learning_rate": 2.81180068063945e-05, "loss": 3.0651, "step": 1394500 }, { "epoch": 8.76, "learning_rate": 2.811015810425588e-05, "loss": 3.069, "step": 1395000 }, { "epoch": 8.76, "learning_rate": 2.8102309402117265e-05, "loss": 3.0677, "step": 1395500 }, { "epoch": 8.77, "learning_rate": 2.809447639738293e-05, "loss": 3.0651, "step": 1396000 }, { "epoch": 8.77, "learning_rate": 2.8086627695244315e-05, "loss": 3.0708, "step": 1396500 }, { "epoch": 8.77, "learning_rate": 2.8078778993105703e-05, "loss": 3.0741, "step": 1397000 }, { "epoch": 8.77, "learning_rate": 2.8070945988371368e-05, "loss": 3.0684, "step": 1397500 }, { "epoch": 8.78, "learning_rate": 2.806309728623275e-05, "loss": 3.0652, "step": 1398000 }, { "epoch": 8.78, "learning_rate": 2.8055248584094134e-05, "loss": 3.0643, "step": 1398500 }, { "epoch": 8.78, "learning_rate": 2.8047399881955522e-05, "loss": 3.0731, "step": 1399000 }, { "epoch": 8.79, "learning_rate": 2.803955117981691e-05, "loss": 3.0695, "step": 1399500 }, { "epoch": 8.79, "learning_rate": 2.803170247767829e-05, "loss": 3.0653, "step": 1400000 }, { "epoch": 8.79, "learning_rate": 2.8023853775539676e-05, "loss": 3.0649, "step": 1400500 }, { "epoch": 8.8, "learning_rate": 2.8016005073401064e-05, "loss": 3.073, "step": 1401000 }, { "epoch": 8.8, "learning_rate": 2.800815637126245e-05, "loss": 3.0683, "step": 1401500 }, { "epoch": 8.8, "learning_rate": 2.8000307669123837e-05, "loss": 3.0654, "step": 1402000 }, { "epoch": 8.81, "learning_rate": 2.799245896698522e-05, "loss": 3.0661, "step": 1402500 }, { "epoch": 8.81, "learning_rate": 2.7984610264846607e-05, "loss": 3.0704, "step": 1403000 }, { "epoch": 8.81, "learning_rate": 2.797676156270799e-05, "loss": 3.0609, "step": 1403500 }, { "epoch": 8.82, "learning_rate": 2.7968928557973656e-05, "loss": 3.0722, "step": 1404000 }, { "epoch": 8.82, "learning_rate": 2.7961079855835044e-05, "loss": 3.069, "step": 1404500 }, { "epoch": 8.82, "learning_rate": 2.7953231153696426e-05, "loss": 3.0725, "step": 1405000 }, { "epoch": 8.83, "learning_rate": 2.794538245155781e-05, "loss": 3.0667, "step": 1405500 }, { "epoch": 8.83, "learning_rate": 2.7937549446823475e-05, "loss": 3.0648, "step": 1406000 }, { "epoch": 8.83, "learning_rate": 2.792970074468486e-05, "loss": 3.0657, "step": 1406500 }, { "epoch": 8.83, "learning_rate": 2.7921852042546248e-05, "loss": 3.0703, "step": 1407000 }, { "epoch": 8.84, "learning_rate": 2.791400334040763e-05, "loss": 3.0699, "step": 1407500 }, { "epoch": 8.84, "learning_rate": 2.790618603307757e-05, "loss": 3.0594, "step": 1408000 }, { "epoch": 8.84, "learning_rate": 2.7898353028343232e-05, "loss": 3.0749, "step": 1408500 }, { "epoch": 8.85, "learning_rate": 2.789050432620462e-05, "loss": 3.0719, "step": 1409000 }, { "epoch": 8.85, "learning_rate": 2.7882655624066005e-05, "loss": 3.0671, "step": 1409500 }, { "epoch": 8.85, "learning_rate": 2.7874806921927393e-05, "loss": 3.0692, "step": 1410000 }, { "epoch": 8.85, "eval_accuracy": 0.45072039876436315, "eval_loss": 2.984428882598877, "eval_runtime": 1449.5456, "eval_samples_per_second": 57.812, "eval_steps_per_second": 5.782, "step": 1410000 }, { "epoch": 8.86, "learning_rate": 2.7866958219788774e-05, "loss": 3.0606, "step": 1410500 }, { "epoch": 8.86, "learning_rate": 2.7859109517650162e-05, "loss": 3.0645, "step": 1411000 }, { "epoch": 8.86, "learning_rate": 2.7851260815511547e-05, "loss": 3.0789, "step": 1411500 }, { "epoch": 8.87, "learning_rate": 2.7843412113372935e-05, "loss": 3.0696, "step": 1412000 }, { "epoch": 8.87, "learning_rate": 2.7835563411234323e-05, "loss": 3.0682, "step": 1412500 }, { "epoch": 8.87, "learning_rate": 2.7827714709095705e-05, "loss": 3.0709, "step": 1413000 }, { "epoch": 8.88, "learning_rate": 2.7819881704361366e-05, "loss": 3.069, "step": 1413500 }, { "epoch": 8.88, "learning_rate": 2.7812033002222754e-05, "loss": 3.0704, "step": 1414000 }, { "epoch": 8.88, "learning_rate": 2.780418430008414e-05, "loss": 3.0644, "step": 1414500 }, { "epoch": 8.88, "learning_rate": 2.7796335597945527e-05, "loss": 3.0653, "step": 1415000 }, { "epoch": 8.89, "learning_rate": 2.778848689580691e-05, "loss": 3.0647, "step": 1415500 }, { "epoch": 8.89, "learning_rate": 2.7780653891072573e-05, "loss": 3.0601, "step": 1416000 }, { "epoch": 8.89, "learning_rate": 2.7772805188933958e-05, "loss": 3.0623, "step": 1416500 }, { "epoch": 8.9, "learning_rate": 2.7764956486795346e-05, "loss": 3.0695, "step": 1417000 }, { "epoch": 8.9, "learning_rate": 2.7757107784656734e-05, "loss": 3.0705, "step": 1417500 }, { "epoch": 8.9, "learning_rate": 2.7749259082518116e-05, "loss": 3.0727, "step": 1418000 }, { "epoch": 8.91, "learning_rate": 2.77414103803795e-05, "loss": 3.0622, "step": 1418500 }, { "epoch": 8.91, "learning_rate": 2.773356167824089e-05, "loss": 3.0715, "step": 1419000 }, { "epoch": 8.91, "learning_rate": 2.7725712976102273e-05, "loss": 3.0687, "step": 1419500 }, { "epoch": 8.92, "learning_rate": 2.7717879971367938e-05, "loss": 3.0675, "step": 1420000 }, { "epoch": 8.92, "learning_rate": 2.771003126922932e-05, "loss": 3.0653, "step": 1420500 }, { "epoch": 8.92, "learning_rate": 2.7702182567090708e-05, "loss": 3.0635, "step": 1421000 }, { "epoch": 8.93, "learning_rate": 2.7694333864952092e-05, "loss": 3.0672, "step": 1421500 }, { "epoch": 8.93, "learning_rate": 2.7686500860217757e-05, "loss": 3.0619, "step": 1422000 }, { "epoch": 8.93, "learning_rate": 2.7678652158079145e-05, "loss": 3.0664, "step": 1422500 }, { "epoch": 8.93, "learning_rate": 2.7670819153344807e-05, "loss": 3.066, "step": 1423000 }, { "epoch": 8.94, "learning_rate": 2.7662970451206188e-05, "loss": 3.0609, "step": 1423500 }, { "epoch": 8.94, "learning_rate": 2.7655121749067576e-05, "loss": 3.0651, "step": 1424000 }, { "epoch": 8.94, "learning_rate": 2.764727304692896e-05, "loss": 3.0663, "step": 1424500 }, { "epoch": 8.95, "learning_rate": 2.763942434479035e-05, "loss": 3.0675, "step": 1425000 }, { "epoch": 8.95, "learning_rate": 2.763157564265173e-05, "loss": 3.0667, "step": 1425500 }, { "epoch": 8.95, "learning_rate": 2.762372694051312e-05, "loss": 3.0657, "step": 1426000 }, { "epoch": 8.96, "learning_rate": 2.7615878238374503e-05, "loss": 3.0684, "step": 1426500 }, { "epoch": 8.96, "learning_rate": 2.7608045233640168e-05, "loss": 3.0672, "step": 1427000 }, { "epoch": 8.96, "learning_rate": 2.7600196531501553e-05, "loss": 3.0575, "step": 1427500 }, { "epoch": 8.97, "learning_rate": 2.7592347829362934e-05, "loss": 3.0674, "step": 1428000 }, { "epoch": 8.97, "learning_rate": 2.75845148246286e-05, "loss": 3.0641, "step": 1428500 }, { "epoch": 8.97, "learning_rate": 2.7576666122489987e-05, "loss": 3.065, "step": 1429000 }, { "epoch": 8.98, "learning_rate": 2.756881742035137e-05, "loss": 3.0702, "step": 1429500 }, { "epoch": 8.98, "learning_rate": 2.756096871821276e-05, "loss": 3.063, "step": 1430000 }, { "epoch": 8.98, "learning_rate": 2.755312001607414e-05, "loss": 3.068, "step": 1430500 }, { "epoch": 8.99, "learning_rate": 2.754527131393553e-05, "loss": 3.0618, "step": 1431000 }, { "epoch": 8.99, "learning_rate": 2.7537422611796914e-05, "loss": 3.0726, "step": 1431500 }, { "epoch": 8.99, "learning_rate": 2.7529573909658302e-05, "loss": 3.0683, "step": 1432000 }, { "epoch": 8.99, "learning_rate": 2.752172520751969e-05, "loss": 3.0647, "step": 1432500 }, { "epoch": 9.0, "learning_rate": 2.7513876505381068e-05, "loss": 3.0745, "step": 1433000 }, { "epoch": 9.0, "learning_rate": 2.7506027803242456e-05, "loss": 3.0598, "step": 1433500 }, { "epoch": 9.0, "learning_rate": 2.7498179101103844e-05, "loss": 3.0488, "step": 1434000 }, { "epoch": 9.01, "learning_rate": 2.749033039896523e-05, "loss": 3.0472, "step": 1434500 }, { "epoch": 9.01, "learning_rate": 2.7482497394230894e-05, "loss": 3.0486, "step": 1435000 }, { "epoch": 9.01, "learning_rate": 2.7474648692092275e-05, "loss": 3.0518, "step": 1435500 }, { "epoch": 9.02, "learning_rate": 2.7466799989953663e-05, "loss": 3.0418, "step": 1436000 }, { "epoch": 9.02, "learning_rate": 2.7458966985219325e-05, "loss": 3.049, "step": 1436500 }, { "epoch": 9.02, "learning_rate": 2.745113398048499e-05, "loss": 3.0465, "step": 1437000 }, { "epoch": 9.03, "learning_rate": 2.7443285278346374e-05, "loss": 3.0455, "step": 1437500 }, { "epoch": 9.03, "learning_rate": 2.7435436576207756e-05, "loss": 3.0496, "step": 1438000 }, { "epoch": 9.03, "learning_rate": 2.7427587874069144e-05, "loss": 3.0518, "step": 1438500 }, { "epoch": 9.04, "learning_rate": 2.7419739171930532e-05, "loss": 3.0422, "step": 1439000 }, { "epoch": 9.04, "learning_rate": 2.7411890469791917e-05, "loss": 3.0449, "step": 1439500 }, { "epoch": 9.04, "learning_rate": 2.7404041767653305e-05, "loss": 3.0466, "step": 1440000 }, { "epoch": 9.04, "eval_accuracy": 0.4510322206673509, "eval_loss": 2.9817347526550293, "eval_runtime": 1450.1501, "eval_samples_per_second": 57.788, "eval_steps_per_second": 5.779, "step": 1440000 }, { "epoch": 9.04, "learning_rate": 2.7396208762918963e-05, "loss": 3.0511, "step": 1440500 }, { "epoch": 9.05, "learning_rate": 2.738836006078035e-05, "loss": 3.0545, "step": 1441000 }, { "epoch": 9.05, "learning_rate": 2.7380511358641736e-05, "loss": 3.0521, "step": 1441500 }, { "epoch": 9.05, "learning_rate": 2.7372662656503124e-05, "loss": 3.0483, "step": 1442000 }, { "epoch": 9.06, "learning_rate": 2.736481395436451e-05, "loss": 3.0508, "step": 1442500 }, { "epoch": 9.06, "learning_rate": 2.735696525222589e-05, "loss": 3.0532, "step": 1443000 }, { "epoch": 9.06, "learning_rate": 2.7349116550087278e-05, "loss": 3.0432, "step": 1443500 }, { "epoch": 9.07, "learning_rate": 2.7341267847948666e-05, "loss": 3.0516, "step": 1444000 }, { "epoch": 9.07, "learning_rate": 2.7333434843214327e-05, "loss": 3.0507, "step": 1444500 }, { "epoch": 9.07, "learning_rate": 2.7325586141075716e-05, "loss": 3.0504, "step": 1445000 }, { "epoch": 9.08, "learning_rate": 2.7317737438937097e-05, "loss": 3.047, "step": 1445500 }, { "epoch": 9.08, "learning_rate": 2.7309888736798485e-05, "loss": 3.0505, "step": 1446000 }, { "epoch": 9.08, "learning_rate": 2.730204003465987e-05, "loss": 3.0512, "step": 1446500 }, { "epoch": 9.09, "learning_rate": 2.7294191332521258e-05, "loss": 3.0544, "step": 1447000 }, { "epoch": 9.09, "learning_rate": 2.728634263038264e-05, "loss": 3.0588, "step": 1447500 }, { "epoch": 9.09, "learning_rate": 2.7278493928244024e-05, "loss": 3.0529, "step": 1448000 }, { "epoch": 9.1, "learning_rate": 2.7270645226105412e-05, "loss": 3.0498, "step": 1448500 }, { "epoch": 9.1, "learning_rate": 2.72627965239668e-05, "loss": 3.0559, "step": 1449000 }, { "epoch": 9.1, "learning_rate": 2.7254947821828185e-05, "loss": 3.0514, "step": 1449500 }, { "epoch": 9.1, "learning_rate": 2.7247099119689566e-05, "loss": 3.0518, "step": 1450000 }, { "epoch": 9.11, "learning_rate": 2.723926611495523e-05, "loss": 3.0543, "step": 1450500 }, { "epoch": 9.11, "learning_rate": 2.723141741281662e-05, "loss": 3.0535, "step": 1451000 }, { "epoch": 9.11, "learning_rate": 2.7223568710678004e-05, "loss": 3.0445, "step": 1451500 }, { "epoch": 9.12, "learning_rate": 2.721573570594367e-05, "loss": 3.0537, "step": 1452000 }, { "epoch": 9.12, "learning_rate": 2.7207887003805054e-05, "loss": 3.0566, "step": 1452500 }, { "epoch": 9.12, "learning_rate": 2.720005399907071e-05, "loss": 3.0512, "step": 1453000 }, { "epoch": 9.13, "learning_rate": 2.71922052969321e-05, "loss": 3.053, "step": 1453500 }, { "epoch": 9.13, "learning_rate": 2.7184356594793488e-05, "loss": 3.048, "step": 1454000 }, { "epoch": 9.13, "learning_rate": 2.7176507892654873e-05, "loss": 3.0519, "step": 1454500 }, { "epoch": 9.14, "learning_rate": 2.7168674887920537e-05, "loss": 3.0522, "step": 1455000 }, { "epoch": 9.14, "learning_rate": 2.716082618578192e-05, "loss": 3.0474, "step": 1455500 }, { "epoch": 9.14, "learning_rate": 2.7152977483643303e-05, "loss": 3.0579, "step": 1456000 }, { "epoch": 9.15, "learning_rate": 2.714512878150469e-05, "loss": 3.0515, "step": 1456500 }, { "epoch": 9.15, "learning_rate": 2.713728007936608e-05, "loss": 3.0538, "step": 1457000 }, { "epoch": 9.15, "learning_rate": 2.7129431377227464e-05, "loss": 3.0481, "step": 1457500 }, { "epoch": 9.15, "learning_rate": 2.7121582675088846e-05, "loss": 3.0515, "step": 1458000 }, { "epoch": 9.16, "learning_rate": 2.7113733972950234e-05, "loss": 3.0494, "step": 1458500 }, { "epoch": 9.16, "learning_rate": 2.7105885270811622e-05, "loss": 3.0454, "step": 1459000 }, { "epoch": 9.16, "learning_rate": 2.7098052266077283e-05, "loss": 3.0485, "step": 1459500 }, { "epoch": 9.17, "learning_rate": 2.709020356393867e-05, "loss": 3.0597, "step": 1460000 }, { "epoch": 9.17, "learning_rate": 2.7082354861800053e-05, "loss": 3.0539, "step": 1460500 }, { "epoch": 9.17, "learning_rate": 2.7074506159661438e-05, "loss": 3.0535, "step": 1461000 }, { "epoch": 9.18, "learning_rate": 2.7066657457522826e-05, "loss": 3.0455, "step": 1461500 }, { "epoch": 9.18, "learning_rate": 2.7058808755384214e-05, "loss": 3.0545, "step": 1462000 }, { "epoch": 9.18, "learning_rate": 2.7050960053245595e-05, "loss": 3.0454, "step": 1462500 }, { "epoch": 9.19, "learning_rate": 2.704311135110698e-05, "loss": 3.0557, "step": 1463000 }, { "epoch": 9.19, "learning_rate": 2.7035262648968368e-05, "loss": 3.0451, "step": 1463500 }, { "epoch": 9.19, "learning_rate": 2.7027429644234033e-05, "loss": 3.0459, "step": 1464000 }, { "epoch": 9.2, "learning_rate": 2.7019580942095418e-05, "loss": 3.0513, "step": 1464500 }, { "epoch": 9.2, "learning_rate": 2.70117322399568e-05, "loss": 3.0452, "step": 1465000 }, { "epoch": 9.2, "learning_rate": 2.7003883537818187e-05, "loss": 3.0506, "step": 1465500 }, { "epoch": 9.2, "learning_rate": 2.6996034835679572e-05, "loss": 3.053, "step": 1466000 }, { "epoch": 9.21, "learning_rate": 2.698818613354096e-05, "loss": 3.051, "step": 1466500 }, { "epoch": 9.21, "learning_rate": 2.6980337431402348e-05, "loss": 3.0555, "step": 1467000 }, { "epoch": 9.21, "learning_rate": 2.697248872926373e-05, "loss": 3.0612, "step": 1467500 }, { "epoch": 9.22, "learning_rate": 2.6964640027125114e-05, "loss": 3.0511, "step": 1468000 }, { "epoch": 9.22, "learning_rate": 2.695680702239078e-05, "loss": 3.0551, "step": 1468500 }, { "epoch": 9.22, "learning_rate": 2.6948958320252167e-05, "loss": 3.0614, "step": 1469000 }, { "epoch": 9.23, "learning_rate": 2.6941109618113552e-05, "loss": 3.0535, "step": 1469500 }, { "epoch": 9.23, "learning_rate": 2.6933260915974933e-05, "loss": 3.0605, "step": 1470000 }, { "epoch": 9.23, "eval_accuracy": 0.4513273153292462, "eval_loss": 2.9798519611358643, "eval_runtime": 1449.8694, "eval_samples_per_second": 57.799, "eval_steps_per_second": 5.781, "step": 1470000 }, { "epoch": 9.23, "learning_rate": 2.6925427911240598e-05, "loss": 3.0511, "step": 1470500 }, { "epoch": 9.24, "learning_rate": 2.6917579209101983e-05, "loss": 3.0549, "step": 1471000 }, { "epoch": 9.24, "learning_rate": 2.690973050696337e-05, "loss": 3.0537, "step": 1471500 }, { "epoch": 9.24, "learning_rate": 2.690188180482476e-05, "loss": 3.0549, "step": 1472000 }, { "epoch": 9.25, "learning_rate": 2.689403310268614e-05, "loss": 3.059, "step": 1472500 }, { "epoch": 9.25, "learning_rate": 2.68862000979518e-05, "loss": 3.0489, "step": 1473000 }, { "epoch": 9.25, "learning_rate": 2.687835139581319e-05, "loss": 3.0558, "step": 1473500 }, { "epoch": 9.26, "learning_rate": 2.6870502693674578e-05, "loss": 3.0481, "step": 1474000 }, { "epoch": 9.26, "learning_rate": 2.6862653991535963e-05, "loss": 3.0548, "step": 1474500 }, { "epoch": 9.26, "learning_rate": 2.6854805289397344e-05, "loss": 3.0598, "step": 1475000 }, { "epoch": 9.26, "learning_rate": 2.6846956587258732e-05, "loss": 3.0443, "step": 1475500 }, { "epoch": 9.27, "learning_rate": 2.6839107885120117e-05, "loss": 3.0474, "step": 1476000 }, { "epoch": 9.27, "learning_rate": 2.683127488038578e-05, "loss": 3.0551, "step": 1476500 }, { "epoch": 9.27, "learning_rate": 2.682342617824717e-05, "loss": 3.0533, "step": 1477000 }, { "epoch": 9.28, "learning_rate": 2.681557747610855e-05, "loss": 3.0545, "step": 1477500 }, { "epoch": 9.28, "learning_rate": 2.6807728773969936e-05, "loss": 3.0605, "step": 1478000 }, { "epoch": 9.28, "learning_rate": 2.6799880071831324e-05, "loss": 3.05, "step": 1478500 }, { "epoch": 9.29, "learning_rate": 2.6792031369692712e-05, "loss": 3.0541, "step": 1479000 }, { "epoch": 9.29, "learning_rate": 2.6784182667554093e-05, "loss": 3.0547, "step": 1479500 }, { "epoch": 9.29, "learning_rate": 2.6776349662819755e-05, "loss": 3.0479, "step": 1480000 }, { "epoch": 9.3, "learning_rate": 2.676851665808542e-05, "loss": 3.0514, "step": 1480500 }, { "epoch": 9.3, "learning_rate": 2.6760667955946804e-05, "loss": 3.0515, "step": 1481000 }, { "epoch": 9.3, "learning_rate": 2.6752819253808192e-05, "loss": 3.0496, "step": 1481500 }, { "epoch": 9.31, "learning_rate": 2.674497055166958e-05, "loss": 3.0581, "step": 1482000 }, { "epoch": 9.31, "learning_rate": 2.6737121849530962e-05, "loss": 3.0563, "step": 1482500 }, { "epoch": 9.31, "learning_rate": 2.6729288844796623e-05, "loss": 3.0533, "step": 1483000 }, { "epoch": 9.31, "learning_rate": 2.672144014265801e-05, "loss": 3.0635, "step": 1483500 }, { "epoch": 9.32, "learning_rate": 2.6713591440519396e-05, "loss": 3.0535, "step": 1484000 }, { "epoch": 9.32, "learning_rate": 2.6705742738380784e-05, "loss": 3.0523, "step": 1484500 }, { "epoch": 9.32, "learning_rate": 2.669790973364645e-05, "loss": 3.057, "step": 1485000 }, { "epoch": 9.33, "learning_rate": 2.669006103150783e-05, "loss": 3.055, "step": 1485500 }, { "epoch": 9.33, "learning_rate": 2.6682212329369215e-05, "loss": 3.0539, "step": 1486000 }, { "epoch": 9.33, "learning_rate": 2.6674363627230603e-05, "loss": 3.0512, "step": 1486500 }, { "epoch": 9.34, "learning_rate": 2.666651492509199e-05, "loss": 3.0581, "step": 1487000 }, { "epoch": 9.34, "learning_rate": 2.6658666222953373e-05, "loss": 3.0552, "step": 1487500 }, { "epoch": 9.34, "learning_rate": 2.6650817520814757e-05, "loss": 3.0568, "step": 1488000 }, { "epoch": 9.35, "learning_rate": 2.6642968818676146e-05, "loss": 3.0511, "step": 1488500 }, { "epoch": 9.35, "learning_rate": 2.663512011653753e-05, "loss": 3.0489, "step": 1489000 }, { "epoch": 9.35, "learning_rate": 2.6627287111803195e-05, "loss": 3.0535, "step": 1489500 }, { "epoch": 9.36, "learning_rate": 2.6619438409664576e-05, "loss": 3.0582, "step": 1490000 }, { "epoch": 9.36, "learning_rate": 2.6611589707525965e-05, "loss": 3.0563, "step": 1490500 }, { "epoch": 9.36, "learning_rate": 2.660374100538735e-05, "loss": 3.0541, "step": 1491000 }, { "epoch": 9.37, "learning_rate": 2.6595892303248737e-05, "loss": 3.0518, "step": 1491500 }, { "epoch": 9.37, "learning_rate": 2.6588074995918675e-05, "loss": 3.0545, "step": 1492000 }, { "epoch": 9.37, "learning_rate": 2.6580226293780064e-05, "loss": 3.046, "step": 1492500 }, { "epoch": 9.37, "learning_rate": 2.6572377591641445e-05, "loss": 3.0507, "step": 1493000 }, { "epoch": 9.38, "learning_rate": 2.6564528889502833e-05, "loss": 3.0668, "step": 1493500 }, { "epoch": 9.38, "learning_rate": 2.6556680187364218e-05, "loss": 3.0528, "step": 1494000 }, { "epoch": 9.38, "learning_rate": 2.6548831485225606e-05, "loss": 3.0563, "step": 1494500 }, { "epoch": 9.39, "learning_rate": 2.6540982783086987e-05, "loss": 3.0602, "step": 1495000 }, { "epoch": 9.39, "learning_rate": 2.6533134080948375e-05, "loss": 3.0583, "step": 1495500 }, { "epoch": 9.39, "learning_rate": 2.6525301076214037e-05, "loss": 3.0525, "step": 1496000 }, { "epoch": 9.4, "learning_rate": 2.6517452374075425e-05, "loss": 3.0557, "step": 1496500 }, { "epoch": 9.4, "learning_rate": 2.650960367193681e-05, "loss": 3.0599, "step": 1497000 }, { "epoch": 9.4, "learning_rate": 2.6501754969798198e-05, "loss": 3.0526, "step": 1497500 }, { "epoch": 9.41, "learning_rate": 2.649390626765958e-05, "loss": 3.0555, "step": 1498000 }, { "epoch": 9.41, "learning_rate": 2.6486057565520967e-05, "loss": 3.0572, "step": 1498500 }, { "epoch": 9.41, "learning_rate": 2.647822456078663e-05, "loss": 3.0562, "step": 1499000 }, { "epoch": 9.42, "learning_rate": 2.6470375858648017e-05, "loss": 3.0531, "step": 1499500 }, { "epoch": 9.42, "learning_rate": 2.6462527156509405e-05, "loss": 3.0492, "step": 1500000 }, { "epoch": 9.42, "eval_accuracy": 0.45171997389943824, "eval_loss": 2.977145195007324, "eval_runtime": 1450.5138, "eval_samples_per_second": 57.773, "eval_steps_per_second": 5.778, "step": 1500000 }, { "epoch": 9.42, "learning_rate": 2.6454678454370786e-05, "loss": 3.0603, "step": 1500500 }, { "epoch": 9.42, "learning_rate": 2.644682975223217e-05, "loss": 3.0494, "step": 1501000 }, { "epoch": 9.43, "learning_rate": 2.6438996747497836e-05, "loss": 3.0558, "step": 1501500 }, { "epoch": 9.43, "learning_rate": 2.643114804535922e-05, "loss": 3.0478, "step": 1502000 }, { "epoch": 9.43, "learning_rate": 2.642329934322061e-05, "loss": 3.053, "step": 1502500 }, { "epoch": 9.44, "learning_rate": 2.641545064108199e-05, "loss": 3.0565, "step": 1503000 }, { "epoch": 9.44, "learning_rate": 2.6407617636347655e-05, "loss": 3.0538, "step": 1503500 }, { "epoch": 9.44, "learning_rate": 2.639976893420904e-05, "loss": 3.0521, "step": 1504000 }, { "epoch": 9.45, "learning_rate": 2.6391920232070428e-05, "loss": 3.0512, "step": 1504500 }, { "epoch": 9.45, "learning_rate": 2.6384071529931816e-05, "loss": 3.0519, "step": 1505000 }, { "epoch": 9.45, "learning_rate": 2.6376222827793197e-05, "loss": 3.0579, "step": 1505500 }, { "epoch": 9.46, "learning_rate": 2.6368374125654582e-05, "loss": 3.0486, "step": 1506000 }, { "epoch": 9.46, "learning_rate": 2.6360541120920247e-05, "loss": 3.0511, "step": 1506500 }, { "epoch": 9.46, "learning_rate": 2.6352708116185908e-05, "loss": 3.0507, "step": 1507000 }, { "epoch": 9.47, "learning_rate": 2.6344875111451573e-05, "loss": 3.0564, "step": 1507500 }, { "epoch": 9.47, "learning_rate": 2.633702640931296e-05, "loss": 3.059, "step": 1508000 }, { "epoch": 9.47, "learning_rate": 2.6329177707174342e-05, "loss": 3.0585, "step": 1508500 }, { "epoch": 9.47, "learning_rate": 2.6321329005035727e-05, "loss": 3.0517, "step": 1509000 }, { "epoch": 9.48, "learning_rate": 2.6313480302897115e-05, "loss": 3.0567, "step": 1509500 }, { "epoch": 9.48, "learning_rate": 2.63056316007585e-05, "loss": 3.054, "step": 1510000 }, { "epoch": 9.48, "learning_rate": 2.6297782898619888e-05, "loss": 3.0523, "step": 1510500 }, { "epoch": 9.49, "learning_rate": 2.628993419648127e-05, "loss": 3.0573, "step": 1511000 }, { "epoch": 9.49, "learning_rate": 2.6282101191746934e-05, "loss": 3.0649, "step": 1511500 }, { "epoch": 9.49, "learning_rate": 2.627425248960832e-05, "loss": 3.0566, "step": 1512000 }, { "epoch": 9.5, "learning_rate": 2.6266403787469707e-05, "loss": 3.0537, "step": 1512500 }, { "epoch": 9.5, "learning_rate": 2.6258555085331095e-05, "loss": 3.0555, "step": 1513000 }, { "epoch": 9.5, "learning_rate": 2.6250706383192476e-05, "loss": 3.0593, "step": 1513500 }, { "epoch": 9.51, "learning_rate": 2.624285768105386e-05, "loss": 3.0502, "step": 1514000 }, { "epoch": 9.51, "learning_rate": 2.623500897891525e-05, "loss": 3.0577, "step": 1514500 }, { "epoch": 9.51, "learning_rate": 2.6227160276776634e-05, "loss": 3.0508, "step": 1515000 }, { "epoch": 9.52, "learning_rate": 2.6219311574638015e-05, "loss": 3.0574, "step": 1515500 }, { "epoch": 9.52, "learning_rate": 2.6211462872499404e-05, "loss": 3.0558, "step": 1516000 }, { "epoch": 9.52, "learning_rate": 2.620361417036079e-05, "loss": 3.0582, "step": 1516500 }, { "epoch": 9.53, "learning_rate": 2.6195765468222176e-05, "loss": 3.0575, "step": 1517000 }, { "epoch": 9.53, "learning_rate": 2.618793246348784e-05, "loss": 3.0506, "step": 1517500 }, { "epoch": 9.53, "learning_rate": 2.6180083761349223e-05, "loss": 3.0564, "step": 1518000 }, { "epoch": 9.53, "learning_rate": 2.617223505921061e-05, "loss": 3.0584, "step": 1518500 }, { "epoch": 9.54, "learning_rate": 2.6164386357071995e-05, "loss": 3.0512, "step": 1519000 }, { "epoch": 9.54, "learning_rate": 2.6156537654933384e-05, "loss": 3.058, "step": 1519500 }, { "epoch": 9.54, "learning_rate": 2.6148688952794768e-05, "loss": 3.0601, "step": 1520000 }, { "epoch": 9.55, "learning_rate": 2.6140855948060426e-05, "loss": 3.0594, "step": 1520500 }, { "epoch": 9.55, "learning_rate": 2.6133007245921814e-05, "loss": 3.0544, "step": 1521000 }, { "epoch": 9.55, "learning_rate": 2.6125158543783202e-05, "loss": 3.0531, "step": 1521500 }, { "epoch": 9.56, "learning_rate": 2.6117325539048864e-05, "loss": 3.0515, "step": 1522000 }, { "epoch": 9.56, "learning_rate": 2.6109476836910252e-05, "loss": 3.0498, "step": 1522500 }, { "epoch": 9.56, "learning_rate": 2.6101628134771633e-05, "loss": 3.0513, "step": 1523000 }, { "epoch": 9.57, "learning_rate": 2.609377943263302e-05, "loss": 3.061, "step": 1523500 }, { "epoch": 9.57, "learning_rate": 2.6085930730494406e-05, "loss": 3.0504, "step": 1524000 }, { "epoch": 9.57, "learning_rate": 2.6078082028355794e-05, "loss": 3.0559, "step": 1524500 }, { "epoch": 9.58, "learning_rate": 2.607023332621718e-05, "loss": 3.0573, "step": 1525000 }, { "epoch": 9.58, "learning_rate": 2.606238462407856e-05, "loss": 3.0565, "step": 1525500 }, { "epoch": 9.58, "learning_rate": 2.605453592193995e-05, "loss": 3.0555, "step": 1526000 }, { "epoch": 9.58, "learning_rate": 2.6046687219801337e-05, "loss": 3.0545, "step": 1526500 }, { "epoch": 9.59, "learning_rate": 2.603883851766272e-05, "loss": 3.0522, "step": 1527000 }, { "epoch": 9.59, "learning_rate": 2.6030989815524103e-05, "loss": 3.0517, "step": 1527500 }, { "epoch": 9.59, "learning_rate": 2.602314111338549e-05, "loss": 3.0609, "step": 1528000 }, { "epoch": 9.6, "learning_rate": 2.601529241124688e-05, "loss": 3.0553, "step": 1528500 }, { "epoch": 9.6, "learning_rate": 2.6007443709108264e-05, "loss": 3.0585, "step": 1529000 }, { "epoch": 9.6, "learning_rate": 2.5999595006969652e-05, "loss": 3.0563, "step": 1529500 }, { "epoch": 9.61, "learning_rate": 2.599176200223531e-05, "loss": 3.0585, "step": 1530000 }, { "epoch": 9.61, "eval_accuracy": 0.45206895967280303, "eval_loss": 2.974055051803589, "eval_runtime": 1450.9898, "eval_samples_per_second": 57.754, "eval_steps_per_second": 5.776, "step": 1530000 }, { "epoch": 9.61, "learning_rate": 2.5983913300096695e-05, "loss": 3.0533, "step": 1530500 }, { "epoch": 9.61, "learning_rate": 2.5976064597958083e-05, "loss": 3.0584, "step": 1531000 }, { "epoch": 9.62, "learning_rate": 2.596821589581947e-05, "loss": 3.0562, "step": 1531500 }, { "epoch": 9.62, "learning_rate": 2.5960367193680856e-05, "loss": 3.0576, "step": 1532000 }, { "epoch": 9.62, "learning_rate": 2.5952534188946514e-05, "loss": 3.0501, "step": 1532500 }, { "epoch": 9.63, "learning_rate": 2.5944685486807902e-05, "loss": 3.0535, "step": 1533000 }, { "epoch": 9.63, "learning_rate": 2.593683678466929e-05, "loss": 3.0522, "step": 1533500 }, { "epoch": 9.63, "learning_rate": 2.5928988082530675e-05, "loss": 3.0573, "step": 1534000 }, { "epoch": 9.64, "learning_rate": 2.592115507779634e-05, "loss": 3.0543, "step": 1534500 }, { "epoch": 9.64, "learning_rate": 2.5913337770466277e-05, "loss": 3.0602, "step": 1535000 }, { "epoch": 9.64, "learning_rate": 2.5905489068327666e-05, "loss": 3.0531, "step": 1535500 }, { "epoch": 9.64, "learning_rate": 2.5897640366189047e-05, "loss": 3.0582, "step": 1536000 }, { "epoch": 9.65, "learning_rate": 2.5889791664050435e-05, "loss": 3.0563, "step": 1536500 }, { "epoch": 9.65, "learning_rate": 2.588194296191182e-05, "loss": 3.0624, "step": 1537000 }, { "epoch": 9.65, "learning_rate": 2.5874109957177484e-05, "loss": 3.0523, "step": 1537500 }, { "epoch": 9.66, "learning_rate": 2.586626125503887e-05, "loss": 3.0538, "step": 1538000 }, { "epoch": 9.66, "learning_rate": 2.585841255290025e-05, "loss": 3.054, "step": 1538500 }, { "epoch": 9.66, "learning_rate": 2.585056385076164e-05, "loss": 3.0578, "step": 1539000 }, { "epoch": 9.67, "learning_rate": 2.5842715148623027e-05, "loss": 3.0548, "step": 1539500 }, { "epoch": 9.67, "learning_rate": 2.583486644648441e-05, "loss": 3.0554, "step": 1540000 }, { "epoch": 9.67, "learning_rate": 2.5827017744345793e-05, "loss": 3.0485, "step": 1540500 }, { "epoch": 9.68, "learning_rate": 2.581916904220718e-05, "loss": 3.0541, "step": 1541000 }, { "epoch": 9.68, "learning_rate": 2.581132034006857e-05, "loss": 3.0578, "step": 1541500 }, { "epoch": 9.68, "learning_rate": 2.580348733533423e-05, "loss": 3.0645, "step": 1542000 }, { "epoch": 9.69, "learning_rate": 2.579563863319562e-05, "loss": 3.0569, "step": 1542500 }, { "epoch": 9.69, "learning_rate": 2.5787789931057e-05, "loss": 3.0593, "step": 1543000 }, { "epoch": 9.69, "learning_rate": 2.5779941228918385e-05, "loss": 3.0524, "step": 1543500 }, { "epoch": 9.69, "learning_rate": 2.5772092526779773e-05, "loss": 3.0558, "step": 1544000 }, { "epoch": 9.7, "learning_rate": 2.576424382464116e-05, "loss": 3.0558, "step": 1544500 }, { "epoch": 9.7, "learning_rate": 2.5756395122502546e-05, "loss": 3.0591, "step": 1545000 }, { "epoch": 9.7, "learning_rate": 2.5748562117768204e-05, "loss": 3.0576, "step": 1545500 }, { "epoch": 9.71, "learning_rate": 2.5740713415629592e-05, "loss": 3.0469, "step": 1546000 }, { "epoch": 9.71, "learning_rate": 2.573286471349098e-05, "loss": 3.0524, "step": 1546500 }, { "epoch": 9.71, "learning_rate": 2.5725016011352365e-05, "loss": 3.0541, "step": 1547000 }, { "epoch": 9.72, "learning_rate": 2.5717167309213753e-05, "loss": 3.0542, "step": 1547500 }, { "epoch": 9.72, "learning_rate": 2.5709318607075134e-05, "loss": 3.0568, "step": 1548000 }, { "epoch": 9.72, "learning_rate": 2.570146990493652e-05, "loss": 3.0549, "step": 1548500 }, { "epoch": 9.73, "learning_rate": 2.5693621202797907e-05, "loss": 3.0542, "step": 1549000 }, { "epoch": 9.73, "learning_rate": 2.5685772500659295e-05, "loss": 3.054, "step": 1549500 }, { "epoch": 9.73, "learning_rate": 2.5677939495924957e-05, "loss": 3.0555, "step": 1550000 }, { "epoch": 9.74, "learning_rate": 2.5670090793786338e-05, "loss": 3.0543, "step": 1550500 }, { "epoch": 9.74, "learning_rate": 2.5662242091647726e-05, "loss": 3.0526, "step": 1551000 }, { "epoch": 9.74, "learning_rate": 2.5654393389509114e-05, "loss": 3.0569, "step": 1551500 }, { "epoch": 9.74, "learning_rate": 2.5646560384774776e-05, "loss": 3.0522, "step": 1552000 }, { "epoch": 9.75, "learning_rate": 2.5638711682636164e-05, "loss": 3.0593, "step": 1552500 }, { "epoch": 9.75, "learning_rate": 2.5630862980497545e-05, "loss": 3.052, "step": 1553000 }, { "epoch": 9.75, "learning_rate": 2.562301427835893e-05, "loss": 3.0561, "step": 1553500 }, { "epoch": 9.76, "learning_rate": 2.5615165576220318e-05, "loss": 3.0578, "step": 1554000 }, { "epoch": 9.76, "learning_rate": 2.5607332571485983e-05, "loss": 3.0605, "step": 1554500 }, { "epoch": 9.76, "learning_rate": 2.5599483869347367e-05, "loss": 3.0634, "step": 1555000 }, { "epoch": 9.77, "learning_rate": 2.559163516720875e-05, "loss": 3.0518, "step": 1555500 }, { "epoch": 9.77, "learning_rate": 2.5583786465070137e-05, "loss": 3.0521, "step": 1556000 }, { "epoch": 9.77, "learning_rate": 2.55759534603358e-05, "loss": 3.0578, "step": 1556500 }, { "epoch": 9.78, "learning_rate": 2.5568104758197186e-05, "loss": 3.0551, "step": 1557000 }, { "epoch": 9.78, "learning_rate": 2.556027175346285e-05, "loss": 3.0491, "step": 1557500 }, { "epoch": 9.78, "learning_rate": 2.5552423051324236e-05, "loss": 3.0529, "step": 1558000 }, { "epoch": 9.79, "learning_rate": 2.5544574349185617e-05, "loss": 3.0525, "step": 1558500 }, { "epoch": 9.79, "learning_rate": 2.5536725647047005e-05, "loss": 3.057, "step": 1559000 }, { "epoch": 9.79, "learning_rate": 2.5528892642312667e-05, "loss": 3.0559, "step": 1559500 }, { "epoch": 9.8, "learning_rate": 2.5521043940174055e-05, "loss": 3.0552, "step": 1560000 }, { "epoch": 9.8, "eval_accuracy": 0.4522988017637978, "eval_loss": 2.9718852043151855, "eval_runtime": 1449.1488, "eval_samples_per_second": 57.828, "eval_steps_per_second": 5.783, "step": 1560000 }, { "epoch": 9.8, "learning_rate": 2.5513195238035443e-05, "loss": 3.0535, "step": 1560500 }, { "epoch": 9.8, "learning_rate": 2.5505346535896824e-05, "loss": 3.0509, "step": 1561000 }, { "epoch": 9.8, "learning_rate": 2.549749783375821e-05, "loss": 3.0585, "step": 1561500 }, { "epoch": 9.81, "learning_rate": 2.5489649131619597e-05, "loss": 3.05, "step": 1562000 }, { "epoch": 9.81, "learning_rate": 2.5481800429480985e-05, "loss": 3.0607, "step": 1562500 }, { "epoch": 9.81, "learning_rate": 2.5473951727342367e-05, "loss": 3.0529, "step": 1563000 }, { "epoch": 9.82, "learning_rate": 2.5466118722608028e-05, "loss": 3.0558, "step": 1563500 }, { "epoch": 9.82, "learning_rate": 2.5458270020469416e-05, "loss": 3.0583, "step": 1564000 }, { "epoch": 9.82, "learning_rate": 2.54504213183308e-05, "loss": 3.0492, "step": 1564500 }, { "epoch": 9.83, "learning_rate": 2.544257261619219e-05, "loss": 3.0484, "step": 1565000 }, { "epoch": 9.83, "learning_rate": 2.543472391405357e-05, "loss": 3.0515, "step": 1565500 }, { "epoch": 9.83, "learning_rate": 2.542687521191496e-05, "loss": 3.0532, "step": 1566000 }, { "epoch": 9.84, "learning_rate": 2.5419026509776343e-05, "loss": 3.0601, "step": 1566500 }, { "epoch": 9.84, "learning_rate": 2.541117780763773e-05, "loss": 3.0597, "step": 1567000 }, { "epoch": 9.84, "learning_rate": 2.5403344802903396e-05, "loss": 3.0531, "step": 1567500 }, { "epoch": 9.85, "learning_rate": 2.5395496100764778e-05, "loss": 3.0521, "step": 1568000 }, { "epoch": 9.85, "learning_rate": 2.5387647398626162e-05, "loss": 3.0558, "step": 1568500 }, { "epoch": 9.85, "learning_rate": 2.537979869648755e-05, "loss": 3.0527, "step": 1569000 }, { "epoch": 9.85, "learning_rate": 2.537194999434894e-05, "loss": 3.0496, "step": 1569500 }, { "epoch": 9.86, "learning_rate": 2.53641169896146e-05, "loss": 3.0566, "step": 1570000 }, { "epoch": 9.86, "learning_rate": 2.535626828747598e-05, "loss": 3.057, "step": 1570500 }, { "epoch": 9.86, "learning_rate": 2.534841958533737e-05, "loss": 3.0556, "step": 1571000 }, { "epoch": 9.87, "learning_rate": 2.5340570883198754e-05, "loss": 3.056, "step": 1571500 }, { "epoch": 9.87, "learning_rate": 2.5332722181060142e-05, "loss": 3.0461, "step": 1572000 }, { "epoch": 9.87, "learning_rate": 2.532487347892153e-05, "loss": 3.0466, "step": 1572500 }, { "epoch": 9.88, "learning_rate": 2.531704047418719e-05, "loss": 3.0557, "step": 1573000 }, { "epoch": 9.88, "learning_rate": 2.5309191772048573e-05, "loss": 3.0539, "step": 1573500 }, { "epoch": 9.88, "learning_rate": 2.5301358767314238e-05, "loss": 3.0513, "step": 1574000 }, { "epoch": 9.89, "learning_rate": 2.5293510065175623e-05, "loss": 3.0499, "step": 1574500 }, { "epoch": 9.89, "learning_rate": 2.528566136303701e-05, "loss": 3.0479, "step": 1575000 }, { "epoch": 9.89, "learning_rate": 2.5277828358302676e-05, "loss": 3.0517, "step": 1575500 }, { "epoch": 9.9, "learning_rate": 2.5269979656164057e-05, "loss": 3.0541, "step": 1576000 }, { "epoch": 9.9, "learning_rate": 2.526213095402544e-05, "loss": 3.0532, "step": 1576500 }, { "epoch": 9.9, "learning_rate": 2.525428225188683e-05, "loss": 3.0534, "step": 1577000 }, { "epoch": 9.91, "learning_rate": 2.524644924715249e-05, "loss": 3.0526, "step": 1577500 }, { "epoch": 9.91, "learning_rate": 2.523860054501388e-05, "loss": 3.0546, "step": 1578000 }, { "epoch": 9.91, "learning_rate": 2.523075184287526e-05, "loss": 3.0541, "step": 1578500 }, { "epoch": 9.91, "learning_rate": 2.522290314073665e-05, "loss": 3.0589, "step": 1579000 }, { "epoch": 9.92, "learning_rate": 2.5215054438598034e-05, "loss": 3.049, "step": 1579500 }, { "epoch": 9.92, "learning_rate": 2.520720573645942e-05, "loss": 3.0519, "step": 1580000 }, { "epoch": 9.92, "learning_rate": 2.519935703432081e-05, "loss": 3.0481, "step": 1580500 }, { "epoch": 9.93, "learning_rate": 2.519150833218219e-05, "loss": 3.0499, "step": 1581000 }, { "epoch": 9.93, "learning_rate": 2.5183659630043576e-05, "loss": 3.0479, "step": 1581500 }, { "epoch": 9.93, "learning_rate": 2.5175810927904964e-05, "loss": 3.0547, "step": 1582000 }, { "epoch": 9.94, "learning_rate": 2.5167962225766352e-05, "loss": 3.0508, "step": 1582500 }, { "epoch": 9.94, "learning_rate": 2.5160113523627733e-05, "loss": 3.0565, "step": 1583000 }, { "epoch": 9.94, "learning_rate": 2.5152280518893395e-05, "loss": 3.0531, "step": 1583500 }, { "epoch": 9.95, "learning_rate": 2.5144431816754783e-05, "loss": 3.0534, "step": 1584000 }, { "epoch": 9.95, "learning_rate": 2.5136598812020444e-05, "loss": 3.0606, "step": 1584500 }, { "epoch": 9.95, "learning_rate": 2.5128750109881832e-05, "loss": 3.0521, "step": 1585000 }, { "epoch": 9.96, "learning_rate": 2.512090140774322e-05, "loss": 3.0492, "step": 1585500 }, { "epoch": 9.96, "learning_rate": 2.5113052705604602e-05, "loss": 3.0475, "step": 1586000 }, { "epoch": 9.96, "learning_rate": 2.5105219700870263e-05, "loss": 3.0535, "step": 1586500 }, { "epoch": 9.96, "learning_rate": 2.509737099873165e-05, "loss": 3.0512, "step": 1587000 }, { "epoch": 9.97, "learning_rate": 2.5089522296593036e-05, "loss": 3.0601, "step": 1587500 }, { "epoch": 9.97, "learning_rate": 2.50816892918587e-05, "loss": 3.0519, "step": 1588000 }, { "epoch": 9.97, "learning_rate": 2.5073840589720082e-05, "loss": 3.0466, "step": 1588500 }, { "epoch": 9.98, "learning_rate": 2.506599188758147e-05, "loss": 3.0516, "step": 1589000 }, { "epoch": 9.98, "learning_rate": 2.5058143185442855e-05, "loss": 3.0537, "step": 1589500 }, { "epoch": 9.98, "learning_rate": 2.5050294483304243e-05, "loss": 3.0479, "step": 1590000 }, { "epoch": 9.98, "eval_accuracy": 0.4526354812335694, "eval_loss": 2.969694137573242, "eval_runtime": 1449.799, "eval_samples_per_second": 57.802, "eval_steps_per_second": 5.781, "step": 1590000 }, { "epoch": 9.99, "learning_rate": 2.504244578116563e-05, "loss": 3.0517, "step": 1590500 }, { "epoch": 9.99, "learning_rate": 2.5034612776431293e-05, "loss": 3.0604, "step": 1591000 }, { "epoch": 9.99, "learning_rate": 2.5026764074292674e-05, "loss": 3.0547, "step": 1591500 }, { "epoch": 10.0, "learning_rate": 2.5018915372154062e-05, "loss": 3.0495, "step": 1592000 }, { "epoch": 10.0, "learning_rate": 2.5011066670015447e-05, "loss": 3.0552, "step": 1592500 }, { "epoch": 10.0, "learning_rate": 2.5003233665281112e-05, "loss": 3.0386, "step": 1593000 }, { "epoch": 10.01, "learning_rate": 2.4995384963142497e-05, "loss": 3.0336, "step": 1593500 }, { "epoch": 10.01, "learning_rate": 2.498753626100388e-05, "loss": 3.0341, "step": 1594000 }, { "epoch": 10.01, "learning_rate": 2.4979703256269543e-05, "loss": 3.0339, "step": 1594500 }, { "epoch": 10.01, "learning_rate": 2.497185455413093e-05, "loss": 3.03, "step": 1595000 }, { "epoch": 10.02, "learning_rate": 2.4964005851992316e-05, "loss": 3.0307, "step": 1595500 }, { "epoch": 10.02, "learning_rate": 2.49561571498537e-05, "loss": 3.0321, "step": 1596000 }, { "epoch": 10.02, "learning_rate": 2.494830844771509e-05, "loss": 3.0414, "step": 1596500 }, { "epoch": 10.03, "learning_rate": 2.4940459745576473e-05, "loss": 3.0438, "step": 1597000 }, { "epoch": 10.03, "learning_rate": 2.4932611043437858e-05, "loss": 3.0354, "step": 1597500 }, { "epoch": 10.03, "learning_rate": 2.4924762341299243e-05, "loss": 3.0346, "step": 1598000 }, { "epoch": 10.04, "learning_rate": 2.491691363916063e-05, "loss": 3.0422, "step": 1598500 }, { "epoch": 10.04, "learning_rate": 2.4909064937022016e-05, "loss": 3.0416, "step": 1599000 }, { "epoch": 10.04, "learning_rate": 2.49012162348834e-05, "loss": 3.0307, "step": 1599500 }, { "epoch": 10.05, "learning_rate": 2.4893367532744785e-05, "loss": 3.0333, "step": 1600000 }, { "epoch": 10.05, "learning_rate": 2.4885518830606173e-05, "loss": 3.0315, "step": 1600500 }, { "epoch": 10.05, "learning_rate": 2.4877670128467558e-05, "loss": 3.0362, "step": 1601000 }, { "epoch": 10.06, "learning_rate": 2.4869821426328943e-05, "loss": 3.0373, "step": 1601500 }, { "epoch": 10.06, "learning_rate": 2.486197272419033e-05, "loss": 3.0389, "step": 1602000 }, { "epoch": 10.06, "learning_rate": 2.4854124022051715e-05, "loss": 3.036, "step": 1602500 }, { "epoch": 10.07, "learning_rate": 2.48462753199131e-05, "loss": 3.0402, "step": 1603000 }, { "epoch": 10.07, "learning_rate": 2.4838426617774485e-05, "loss": 3.0411, "step": 1603500 }, { "epoch": 10.07, "learning_rate": 2.4830577915635873e-05, "loss": 3.0446, "step": 1604000 }, { "epoch": 10.07, "learning_rate": 2.4822729213497254e-05, "loss": 3.0309, "step": 1604500 }, { "epoch": 10.08, "learning_rate": 2.4814880511358643e-05, "loss": 3.0407, "step": 1605000 }, { "epoch": 10.08, "learning_rate": 2.480703180922003e-05, "loss": 3.0317, "step": 1605500 }, { "epoch": 10.08, "learning_rate": 2.4799198804485692e-05, "loss": 3.0322, "step": 1606000 }, { "epoch": 10.09, "learning_rate": 2.4791350102347077e-05, "loss": 3.0377, "step": 1606500 }, { "epoch": 10.09, "learning_rate": 2.478351709761274e-05, "loss": 3.0477, "step": 1607000 }, { "epoch": 10.09, "learning_rate": 2.4775684092878403e-05, "loss": 3.0368, "step": 1607500 }, { "epoch": 10.1, "learning_rate": 2.4767835390739788e-05, "loss": 3.0332, "step": 1608000 }, { "epoch": 10.1, "learning_rate": 2.4759986688601176e-05, "loss": 3.0331, "step": 1608500 }, { "epoch": 10.1, "learning_rate": 2.475213798646256e-05, "loss": 3.0357, "step": 1609000 }, { "epoch": 10.11, "learning_rate": 2.4744289284323945e-05, "loss": 3.0392, "step": 1609500 }, { "epoch": 10.11, "learning_rate": 2.473644058218533e-05, "loss": 3.0423, "step": 1610000 }, { "epoch": 10.11, "learning_rate": 2.4728591880046718e-05, "loss": 3.0425, "step": 1610500 }, { "epoch": 10.12, "learning_rate": 2.47207431779081e-05, "loss": 3.0387, "step": 1611000 }, { "epoch": 10.12, "learning_rate": 2.4712894475769488e-05, "loss": 3.0368, "step": 1611500 }, { "epoch": 10.12, "learning_rate": 2.4705045773630876e-05, "loss": 3.0395, "step": 1612000 }, { "epoch": 10.12, "learning_rate": 2.469719707149226e-05, "loss": 3.0364, "step": 1612500 }, { "epoch": 10.13, "learning_rate": 2.4689348369353645e-05, "loss": 3.0458, "step": 1613000 }, { "epoch": 10.13, "learning_rate": 2.468149966721503e-05, "loss": 3.0415, "step": 1613500 }, { "epoch": 10.13, "learning_rate": 2.4673666662480695e-05, "loss": 3.0425, "step": 1614000 }, { "epoch": 10.14, "learning_rate": 2.466581796034208e-05, "loss": 3.0364, "step": 1614500 }, { "epoch": 10.14, "learning_rate": 2.4657969258203464e-05, "loss": 3.0394, "step": 1615000 }, { "epoch": 10.14, "learning_rate": 2.4650120556064852e-05, "loss": 3.0418, "step": 1615500 }, { "epoch": 10.15, "learning_rate": 2.4642271853926234e-05, "loss": 3.0338, "step": 1616000 }, { "epoch": 10.15, "learning_rate": 2.46344388491919e-05, "loss": 3.0464, "step": 1616500 }, { "epoch": 10.15, "learning_rate": 2.4626590147053287e-05, "loss": 3.0419, "step": 1617000 }, { "epoch": 10.16, "learning_rate": 2.461874144491467e-05, "loss": 3.0469, "step": 1617500 }, { "epoch": 10.16, "learning_rate": 2.4610892742776056e-05, "loss": 3.0383, "step": 1618000 }, { "epoch": 10.16, "learning_rate": 2.460305973804172e-05, "loss": 3.046, "step": 1618500 }, { "epoch": 10.17, "learning_rate": 2.4595211035903106e-05, "loss": 3.0422, "step": 1619000 }, { "epoch": 10.17, "learning_rate": 2.458736233376449e-05, "loss": 3.0395, "step": 1619500 }, { "epoch": 10.17, "learning_rate": 2.4579513631625875e-05, "loss": 3.0402, "step": 1620000 }, { "epoch": 10.17, "eval_accuracy": 0.45282602480626527, "eval_loss": 2.9689009189605713, "eval_runtime": 1450.432, "eval_samples_per_second": 57.777, "eval_steps_per_second": 5.778, "step": 1620000 }, { "epoch": 10.18, "learning_rate": 2.457168062689154e-05, "loss": 3.043, "step": 1620500 }, { "epoch": 10.18, "learning_rate": 2.4563831924752925e-05, "loss": 3.042, "step": 1621000 }, { "epoch": 10.18, "learning_rate": 2.455598322261431e-05, "loss": 3.0438, "step": 1621500 }, { "epoch": 10.18, "learning_rate": 2.4548134520475697e-05, "loss": 3.0401, "step": 1622000 }, { "epoch": 10.19, "learning_rate": 2.454028581833708e-05, "loss": 3.0423, "step": 1622500 }, { "epoch": 10.19, "learning_rate": 2.4532437116198467e-05, "loss": 3.0413, "step": 1623000 }, { "epoch": 10.19, "learning_rate": 2.452458841405985e-05, "loss": 3.0404, "step": 1623500 }, { "epoch": 10.2, "learning_rate": 2.451673971192124e-05, "loss": 3.0435, "step": 1624000 }, { "epoch": 10.2, "learning_rate": 2.450889100978262e-05, "loss": 3.044, "step": 1624500 }, { "epoch": 10.2, "learning_rate": 2.450104230764401e-05, "loss": 3.0379, "step": 1625000 }, { "epoch": 10.21, "learning_rate": 2.4493193605505394e-05, "loss": 3.0495, "step": 1625500 }, { "epoch": 10.21, "learning_rate": 2.448534490336678e-05, "loss": 3.0413, "step": 1626000 }, { "epoch": 10.21, "learning_rate": 2.4477496201228167e-05, "loss": 3.0391, "step": 1626500 }, { "epoch": 10.22, "learning_rate": 2.4469663196493828e-05, "loss": 3.0425, "step": 1627000 }, { "epoch": 10.22, "learning_rate": 2.4461814494355213e-05, "loss": 3.0445, "step": 1627500 }, { "epoch": 10.22, "learning_rate": 2.44539657922166e-05, "loss": 3.041, "step": 1628000 }, { "epoch": 10.23, "learning_rate": 2.4446117090077986e-05, "loss": 3.0394, "step": 1628500 }, { "epoch": 10.23, "learning_rate": 2.4438268387939374e-05, "loss": 3.0417, "step": 1629000 }, { "epoch": 10.23, "learning_rate": 2.4430419685800755e-05, "loss": 3.0365, "step": 1629500 }, { "epoch": 10.23, "learning_rate": 2.442258668106642e-05, "loss": 3.0415, "step": 1630000 }, { "epoch": 10.24, "learning_rate": 2.4414737978927808e-05, "loss": 3.0398, "step": 1630500 }, { "epoch": 10.24, "learning_rate": 2.4406904974193466e-05, "loss": 3.0386, "step": 1631000 }, { "epoch": 10.24, "learning_rate": 2.4399056272054854e-05, "loss": 3.0505, "step": 1631500 }, { "epoch": 10.25, "learning_rate": 2.439120756991624e-05, "loss": 3.0415, "step": 1632000 }, { "epoch": 10.25, "learning_rate": 2.4383358867777624e-05, "loss": 3.0434, "step": 1632500 }, { "epoch": 10.25, "learning_rate": 2.4375510165639012e-05, "loss": 3.044, "step": 1633000 }, { "epoch": 10.26, "learning_rate": 2.4367661463500397e-05, "loss": 3.0357, "step": 1633500 }, { "epoch": 10.26, "learning_rate": 2.4359812761361785e-05, "loss": 3.0394, "step": 1634000 }, { "epoch": 10.26, "learning_rate": 2.4351979756627446e-05, "loss": 3.0469, "step": 1634500 }, { "epoch": 10.27, "learning_rate": 2.434413105448883e-05, "loss": 3.0463, "step": 1635000 }, { "epoch": 10.27, "learning_rate": 2.433628235235022e-05, "loss": 3.0443, "step": 1635500 }, { "epoch": 10.27, "learning_rate": 2.43284336502116e-05, "loss": 3.0431, "step": 1636000 }, { "epoch": 10.28, "learning_rate": 2.432058494807299e-05, "loss": 3.0485, "step": 1636500 }, { "epoch": 10.28, "learning_rate": 2.4312736245934373e-05, "loss": 3.0402, "step": 1637000 }, { "epoch": 10.28, "learning_rate": 2.4304887543795758e-05, "loss": 3.0411, "step": 1637500 }, { "epoch": 10.28, "learning_rate": 2.4297054539061423e-05, "loss": 3.0384, "step": 1638000 }, { "epoch": 10.29, "learning_rate": 2.4289205836922808e-05, "loss": 3.0404, "step": 1638500 }, { "epoch": 10.29, "learning_rate": 2.4281357134784192e-05, "loss": 3.0402, "step": 1639000 }, { "epoch": 10.29, "learning_rate": 2.4273508432645577e-05, "loss": 3.0408, "step": 1639500 }, { "epoch": 10.3, "learning_rate": 2.4265659730506965e-05, "loss": 3.0354, "step": 1640000 }, { "epoch": 10.3, "learning_rate": 2.425781102836835e-05, "loss": 3.0454, "step": 1640500 }, { "epoch": 10.3, "learning_rate": 2.4249962326229735e-05, "loss": 3.0414, "step": 1641000 }, { "epoch": 10.31, "learning_rate": 2.424211362409112e-05, "loss": 3.0449, "step": 1641500 }, { "epoch": 10.31, "learning_rate": 2.4234264921952508e-05, "loss": 3.0423, "step": 1642000 }, { "epoch": 10.31, "learning_rate": 2.4226416219813892e-05, "loss": 3.0387, "step": 1642500 }, { "epoch": 10.32, "learning_rate": 2.4218567517675277e-05, "loss": 3.041, "step": 1643000 }, { "epoch": 10.32, "learning_rate": 2.4210718815536665e-05, "loss": 3.0478, "step": 1643500 }, { "epoch": 10.32, "learning_rate": 2.420287011339805e-05, "loss": 3.0486, "step": 1644000 }, { "epoch": 10.33, "learning_rate": 2.419503710866371e-05, "loss": 3.0464, "step": 1644500 }, { "epoch": 10.33, "learning_rate": 2.41871884065251e-05, "loss": 3.0414, "step": 1645000 }, { "epoch": 10.33, "learning_rate": 2.4179339704386484e-05, "loss": 3.0448, "step": 1645500 }, { "epoch": 10.34, "learning_rate": 2.417149100224787e-05, "loss": 3.0469, "step": 1646000 }, { "epoch": 10.34, "learning_rate": 2.416367369491781e-05, "loss": 3.0419, "step": 1646500 }, { "epoch": 10.34, "learning_rate": 2.4155824992779195e-05, "loss": 3.0432, "step": 1647000 }, { "epoch": 10.34, "learning_rate": 2.414797629064058e-05, "loss": 3.0384, "step": 1647500 }, { "epoch": 10.35, "learning_rate": 2.4140127588501964e-05, "loss": 3.0372, "step": 1648000 }, { "epoch": 10.35, "learning_rate": 2.4132278886363353e-05, "loss": 3.0417, "step": 1648500 }, { "epoch": 10.35, "learning_rate": 2.4124430184224737e-05, "loss": 3.0387, "step": 1649000 }, { "epoch": 10.36, "learning_rate": 2.41165971794904e-05, "loss": 3.0408, "step": 1649500 }, { "epoch": 10.36, "learning_rate": 2.4108748477351787e-05, "loss": 3.0489, "step": 1650000 }, { "epoch": 10.36, "eval_accuracy": 0.4531396080854071, "eval_loss": 2.96740984916687, "eval_runtime": 1449.8612, "eval_samples_per_second": 57.799, "eval_steps_per_second": 5.781, "step": 1650000 }, { "epoch": 10.36, "learning_rate": 2.410089977521317e-05, "loss": 3.0381, "step": 1650500 }, { "epoch": 10.37, "learning_rate": 2.4093051073074556e-05, "loss": 3.0404, "step": 1651000 }, { "epoch": 10.37, "learning_rate": 2.4085202370935944e-05, "loss": 3.0413, "step": 1651500 }, { "epoch": 10.37, "learning_rate": 2.407735366879733e-05, "loss": 3.0425, "step": 1652000 }, { "epoch": 10.38, "learning_rate": 2.4069504966658714e-05, "loss": 3.0433, "step": 1652500 }, { "epoch": 10.38, "learning_rate": 2.40616562645201e-05, "loss": 3.0436, "step": 1653000 }, { "epoch": 10.38, "learning_rate": 2.405383895719004e-05, "loss": 3.0433, "step": 1653500 }, { "epoch": 10.39, "learning_rate": 2.4045990255051425e-05, "loss": 3.0468, "step": 1654000 }, { "epoch": 10.39, "learning_rate": 2.403814155291281e-05, "loss": 3.043, "step": 1654500 }, { "epoch": 10.39, "learning_rate": 2.4030292850774198e-05, "loss": 3.0447, "step": 1655000 }, { "epoch": 10.39, "learning_rate": 2.402245984603986e-05, "loss": 3.0398, "step": 1655500 }, { "epoch": 10.4, "learning_rate": 2.4014611143901244e-05, "loss": 3.045, "step": 1656000 }, { "epoch": 10.4, "learning_rate": 2.4006762441762632e-05, "loss": 3.0469, "step": 1656500 }, { "epoch": 10.4, "learning_rate": 2.3998913739624017e-05, "loss": 3.0417, "step": 1657000 }, { "epoch": 10.41, "learning_rate": 2.39910650374854e-05, "loss": 3.0419, "step": 1657500 }, { "epoch": 10.41, "learning_rate": 2.3983232032751066e-05, "loss": 3.0388, "step": 1658000 }, { "epoch": 10.41, "learning_rate": 2.397538333061245e-05, "loss": 3.0412, "step": 1658500 }, { "epoch": 10.42, "learning_rate": 2.3967534628473836e-05, "loss": 3.0402, "step": 1659000 }, { "epoch": 10.42, "learning_rate": 2.3959685926335224e-05, "loss": 3.0409, "step": 1659500 }, { "epoch": 10.42, "learning_rate": 2.3951852921600885e-05, "loss": 3.048, "step": 1660000 }, { "epoch": 10.43, "learning_rate": 2.394400421946227e-05, "loss": 3.0368, "step": 1660500 }, { "epoch": 10.43, "learning_rate": 2.3936155517323655e-05, "loss": 3.043, "step": 1661000 }, { "epoch": 10.43, "learning_rate": 2.392832251258932e-05, "loss": 3.0375, "step": 1661500 }, { "epoch": 10.44, "learning_rate": 2.392048950785498e-05, "loss": 3.0454, "step": 1662000 }, { "epoch": 10.44, "learning_rate": 2.391264080571637e-05, "loss": 3.0465, "step": 1662500 }, { "epoch": 10.44, "learning_rate": 2.3904792103577754e-05, "loss": 3.0496, "step": 1663000 }, { "epoch": 10.45, "learning_rate": 2.389694340143914e-05, "loss": 3.0473, "step": 1663500 }, { "epoch": 10.45, "learning_rate": 2.3889110396704803e-05, "loss": 3.0421, "step": 1664000 }, { "epoch": 10.45, "learning_rate": 2.3881261694566188e-05, "loss": 3.0447, "step": 1664500 }, { "epoch": 10.45, "learning_rate": 2.3873412992427573e-05, "loss": 3.0464, "step": 1665000 }, { "epoch": 10.46, "learning_rate": 2.3865564290288957e-05, "loss": 3.0457, "step": 1665500 }, { "epoch": 10.46, "learning_rate": 2.3857715588150345e-05, "loss": 3.0435, "step": 1666000 }, { "epoch": 10.46, "learning_rate": 2.384986688601173e-05, "loss": 3.0395, "step": 1666500 }, { "epoch": 10.47, "learning_rate": 2.3842018183873115e-05, "loss": 3.0405, "step": 1667000 }, { "epoch": 10.47, "learning_rate": 2.38341694817345e-05, "loss": 3.0433, "step": 1667500 }, { "epoch": 10.47, "learning_rate": 2.3826336477000164e-05, "loss": 3.0422, "step": 1668000 }, { "epoch": 10.48, "learning_rate": 2.381848777486155e-05, "loss": 3.0464, "step": 1668500 }, { "epoch": 10.48, "learning_rate": 2.3810639072722934e-05, "loss": 3.0357, "step": 1669000 }, { "epoch": 10.48, "learning_rate": 2.3802790370584322e-05, "loss": 3.0429, "step": 1669500 }, { "epoch": 10.49, "learning_rate": 2.3794941668445707e-05, "loss": 3.0389, "step": 1670000 }, { "epoch": 10.49, "learning_rate": 2.378709296630709e-05, "loss": 3.038, "step": 1670500 }, { "epoch": 10.49, "learning_rate": 2.377924426416848e-05, "loss": 3.042, "step": 1671000 }, { "epoch": 10.5, "learning_rate": 2.3771395562029864e-05, "loss": 3.0492, "step": 1671500 }, { "epoch": 10.5, "learning_rate": 2.376354685989125e-05, "loss": 3.05, "step": 1672000 }, { "epoch": 10.5, "learning_rate": 2.3755698157752634e-05, "loss": 3.0419, "step": 1672500 }, { "epoch": 10.5, "learning_rate": 2.37478651530183e-05, "loss": 3.0424, "step": 1673000 }, { "epoch": 10.51, "learning_rate": 2.3740016450879683e-05, "loss": 3.0421, "step": 1673500 }, { "epoch": 10.51, "learning_rate": 2.3732167748741068e-05, "loss": 3.0428, "step": 1674000 }, { "epoch": 10.51, "learning_rate": 2.3724319046602456e-05, "loss": 3.0418, "step": 1674500 }, { "epoch": 10.52, "learning_rate": 2.3716486041868118e-05, "loss": 3.0451, "step": 1675000 }, { "epoch": 10.52, "learning_rate": 2.3708637339729502e-05, "loss": 3.0444, "step": 1675500 }, { "epoch": 10.52, "learning_rate": 2.370078863759089e-05, "loss": 3.0429, "step": 1676000 }, { "epoch": 10.53, "learning_rate": 2.3692939935452275e-05, "loss": 3.0361, "step": 1676500 }, { "epoch": 10.53, "learning_rate": 2.368509123331366e-05, "loss": 3.0506, "step": 1677000 }, { "epoch": 10.53, "learning_rate": 2.3677242531175045e-05, "loss": 3.0388, "step": 1677500 }, { "epoch": 10.54, "learning_rate": 2.366940952644071e-05, "loss": 3.0474, "step": 1678000 }, { "epoch": 10.54, "learning_rate": 2.3661560824302094e-05, "loss": 3.0495, "step": 1678500 }, { "epoch": 10.54, "learning_rate": 2.365371212216348e-05, "loss": 3.046, "step": 1679000 }, { "epoch": 10.55, "learning_rate": 2.3645863420024867e-05, "loss": 3.0425, "step": 1679500 }, { "epoch": 10.55, "learning_rate": 2.3638014717886252e-05, "loss": 3.0421, "step": 1680000 }, { "epoch": 10.55, "eval_accuracy": 0.45337234303260193, "eval_loss": 2.9639856815338135, "eval_runtime": 1448.8654, "eval_samples_per_second": 57.839, "eval_steps_per_second": 5.785, "step": 1680000 }, { "epoch": 10.55, "learning_rate": 2.3630166015747637e-05, "loss": 3.0407, "step": 1680500 }, { "epoch": 10.55, "learning_rate": 2.36223330110133e-05, "loss": 3.0463, "step": 1681000 }, { "epoch": 10.56, "learning_rate": 2.3614484308874686e-05, "loss": 3.0458, "step": 1681500 }, { "epoch": 10.56, "learning_rate": 2.360663560673607e-05, "loss": 3.0422, "step": 1682000 }, { "epoch": 10.56, "learning_rate": 2.3598786904597456e-05, "loss": 3.0457, "step": 1682500 }, { "epoch": 10.57, "learning_rate": 2.359095389986312e-05, "loss": 3.0514, "step": 1683000 }, { "epoch": 10.57, "learning_rate": 2.3583105197724505e-05, "loss": 3.0462, "step": 1683500 }, { "epoch": 10.57, "learning_rate": 2.357525649558589e-05, "loss": 3.043, "step": 1684000 }, { "epoch": 10.58, "learning_rate": 2.3567407793447278e-05, "loss": 3.0449, "step": 1684500 }, { "epoch": 10.58, "learning_rate": 2.3559559091308663e-05, "loss": 3.038, "step": 1685000 }, { "epoch": 10.58, "learning_rate": 2.3551710389170047e-05, "loss": 3.0356, "step": 1685500 }, { "epoch": 10.59, "learning_rate": 2.3543861687031436e-05, "loss": 3.0439, "step": 1686000 }, { "epoch": 10.59, "learning_rate": 2.353601298489282e-05, "loss": 3.0405, "step": 1686500 }, { "epoch": 10.59, "learning_rate": 2.3528164282754205e-05, "loss": 3.0456, "step": 1687000 }, { "epoch": 10.6, "learning_rate": 2.352031558061559e-05, "loss": 3.0388, "step": 1687500 }, { "epoch": 10.6, "learning_rate": 2.3512466878476978e-05, "loss": 3.0436, "step": 1688000 }, { "epoch": 10.6, "learning_rate": 2.3504618176338363e-05, "loss": 3.0396, "step": 1688500 }, { "epoch": 10.61, "learning_rate": 2.3496769474199747e-05, "loss": 3.0345, "step": 1689000 }, { "epoch": 10.61, "learning_rate": 2.3488920772061132e-05, "loss": 3.0434, "step": 1689500 }, { "epoch": 10.61, "learning_rate": 2.348107206992252e-05, "loss": 3.0484, "step": 1690000 }, { "epoch": 10.61, "learning_rate": 2.34732233677839e-05, "loss": 3.0399, "step": 1690500 }, { "epoch": 10.62, "learning_rate": 2.3465390363049566e-05, "loss": 3.0441, "step": 1691000 }, { "epoch": 10.62, "learning_rate": 2.3457541660910955e-05, "loss": 3.0469, "step": 1691500 }, { "epoch": 10.62, "learning_rate": 2.3449708656176616e-05, "loss": 3.0486, "step": 1692000 }, { "epoch": 10.63, "learning_rate": 2.3441859954038e-05, "loss": 3.0401, "step": 1692500 }, { "epoch": 10.63, "learning_rate": 2.343401125189939e-05, "loss": 3.039, "step": 1693000 }, { "epoch": 10.63, "learning_rate": 2.342616254976077e-05, "loss": 3.0469, "step": 1693500 }, { "epoch": 10.64, "learning_rate": 2.3418329545026435e-05, "loss": 3.0448, "step": 1694000 }, { "epoch": 10.64, "learning_rate": 2.3410480842887823e-05, "loss": 3.0415, "step": 1694500 }, { "epoch": 10.64, "learning_rate": 2.3402632140749208e-05, "loss": 3.0432, "step": 1695000 }, { "epoch": 10.65, "learning_rate": 2.3394783438610592e-05, "loss": 3.0407, "step": 1695500 }, { "epoch": 10.65, "learning_rate": 2.3386934736471977e-05, "loss": 3.0446, "step": 1696000 }, { "epoch": 10.65, "learning_rate": 2.3379086034333365e-05, "loss": 3.0366, "step": 1696500 }, { "epoch": 10.66, "learning_rate": 2.3371237332194747e-05, "loss": 3.0367, "step": 1697000 }, { "epoch": 10.66, "learning_rate": 2.336342002486469e-05, "loss": 3.0402, "step": 1697500 }, { "epoch": 10.66, "learning_rate": 2.3355571322726076e-05, "loss": 3.0372, "step": 1698000 }, { "epoch": 10.66, "learning_rate": 2.334772262058746e-05, "loss": 3.045, "step": 1698500 }, { "epoch": 10.67, "learning_rate": 2.3339873918448846e-05, "loss": 3.042, "step": 1699000 }, { "epoch": 10.67, "learning_rate": 2.3332025216310234e-05, "loss": 3.0377, "step": 1699500 }, { "epoch": 10.67, "learning_rate": 2.3324176514171615e-05, "loss": 3.0482, "step": 1700000 }, { "epoch": 10.68, "learning_rate": 2.3316327812033003e-05, "loss": 3.0476, "step": 1700500 }, { "epoch": 10.68, "learning_rate": 2.3308479109894388e-05, "loss": 3.0437, "step": 1701000 }, { "epoch": 10.68, "learning_rate": 2.3300630407755776e-05, "loss": 3.0436, "step": 1701500 }, { "epoch": 10.69, "learning_rate": 2.329278170561716e-05, "loss": 3.0383, "step": 1702000 }, { "epoch": 10.69, "learning_rate": 2.3284933003478546e-05, "loss": 3.0372, "step": 1702500 }, { "epoch": 10.69, "learning_rate": 2.3277084301339934e-05, "loss": 3.0414, "step": 1703000 }, { "epoch": 10.7, "learning_rate": 2.3269251296605592e-05, "loss": 3.0448, "step": 1703500 }, { "epoch": 10.7, "learning_rate": 2.326140259446698e-05, "loss": 3.0395, "step": 1704000 }, { "epoch": 10.7, "learning_rate": 2.3253553892328368e-05, "loss": 3.0463, "step": 1704500 }, { "epoch": 10.71, "learning_rate": 2.324570519018975e-05, "loss": 3.0463, "step": 1705000 }, { "epoch": 10.71, "learning_rate": 2.3237856488051138e-05, "loss": 3.0437, "step": 1705500 }, { "epoch": 10.71, "learning_rate": 2.3230007785912522e-05, "loss": 3.0488, "step": 1706000 }, { "epoch": 10.72, "learning_rate": 2.322215908377391e-05, "loss": 3.0397, "step": 1706500 }, { "epoch": 10.72, "learning_rate": 2.3214310381635292e-05, "loss": 3.0416, "step": 1707000 }, { "epoch": 10.72, "learning_rate": 2.3206477376900957e-05, "loss": 3.0357, "step": 1707500 }, { "epoch": 10.72, "learning_rate": 2.319864437216662e-05, "loss": 3.0341, "step": 1708000 }, { "epoch": 10.73, "learning_rate": 2.3190795670028006e-05, "loss": 3.0427, "step": 1708500 }, { "epoch": 10.73, "learning_rate": 2.318294696788939e-05, "loss": 3.0468, "step": 1709000 }, { "epoch": 10.73, "learning_rate": 2.3175113963155056e-05, "loss": 3.0474, "step": 1709500 }, { "epoch": 10.74, "learning_rate": 2.3167265261016437e-05, "loss": 3.0458, "step": 1710000 }, { "epoch": 10.74, "eval_accuracy": 0.45366343668773657, "eval_loss": 2.9618566036224365, "eval_runtime": 1448.0946, "eval_samples_per_second": 57.87, "eval_steps_per_second": 5.788, "step": 1710000 }, { "epoch": 10.74, "learning_rate": 2.3159416558877825e-05, "loss": 3.0466, "step": 1710500 }, { "epoch": 10.74, "learning_rate": 2.3151567856739213e-05, "loss": 3.0454, "step": 1711000 }, { "epoch": 10.75, "learning_rate": 2.3143719154600594e-05, "loss": 3.0433, "step": 1711500 }, { "epoch": 10.75, "learning_rate": 2.3135870452461983e-05, "loss": 3.0413, "step": 1712000 }, { "epoch": 10.75, "learning_rate": 2.3128021750323367e-05, "loss": 3.0442, "step": 1712500 }, { "epoch": 10.76, "learning_rate": 2.3120173048184755e-05, "loss": 3.0478, "step": 1713000 }, { "epoch": 10.76, "learning_rate": 2.3112324346046137e-05, "loss": 3.0505, "step": 1713500 }, { "epoch": 10.76, "learning_rate": 2.3104475643907525e-05, "loss": 3.042, "step": 1714000 }, { "epoch": 10.77, "learning_rate": 2.309662694176891e-05, "loss": 3.0428, "step": 1714500 }, { "epoch": 10.77, "learning_rate": 2.3088778239630294e-05, "loss": 3.0443, "step": 1715000 }, { "epoch": 10.77, "learning_rate": 2.3080960932300236e-05, "loss": 3.0404, "step": 1715500 }, { "epoch": 10.77, "learning_rate": 2.3073112230161624e-05, "loss": 3.0484, "step": 1716000 }, { "epoch": 10.78, "learning_rate": 2.3065263528023005e-05, "loss": 3.0325, "step": 1716500 }, { "epoch": 10.78, "learning_rate": 2.305743052328867e-05, "loss": 3.0388, "step": 1717000 }, { "epoch": 10.78, "learning_rate": 2.3049581821150058e-05, "loss": 3.0469, "step": 1717500 }, { "epoch": 10.79, "learning_rate": 2.304173311901144e-05, "loss": 3.041, "step": 1718000 }, { "epoch": 10.79, "learning_rate": 2.3033884416872828e-05, "loss": 3.0388, "step": 1718500 }, { "epoch": 10.79, "learning_rate": 2.3026035714734212e-05, "loss": 3.0448, "step": 1719000 }, { "epoch": 10.8, "learning_rate": 2.30181870125956e-05, "loss": 3.0391, "step": 1719500 }, { "epoch": 10.8, "learning_rate": 2.3010338310456982e-05, "loss": 3.0372, "step": 1720000 }, { "epoch": 10.8, "learning_rate": 2.300248960831837e-05, "loss": 3.0397, "step": 1720500 }, { "epoch": 10.81, "learning_rate": 2.2994640906179755e-05, "loss": 3.0411, "step": 1721000 }, { "epoch": 10.81, "learning_rate": 2.298679220404114e-05, "loss": 3.0417, "step": 1721500 }, { "epoch": 10.81, "learning_rate": 2.2978943501902528e-05, "loss": 3.0432, "step": 1722000 }, { "epoch": 10.82, "learning_rate": 2.2971094799763912e-05, "loss": 3.0416, "step": 1722500 }, { "epoch": 10.82, "learning_rate": 2.2963261795029574e-05, "loss": 3.0427, "step": 1723000 }, { "epoch": 10.82, "learning_rate": 2.295541309289096e-05, "loss": 3.0405, "step": 1723500 }, { "epoch": 10.82, "learning_rate": 2.2947580088156623e-05, "loss": 3.0406, "step": 1724000 }, { "epoch": 10.83, "learning_rate": 2.2939731386018008e-05, "loss": 3.0385, "step": 1724500 }, { "epoch": 10.83, "learning_rate": 2.2931882683879393e-05, "loss": 3.04, "step": 1725000 }, { "epoch": 10.83, "learning_rate": 2.292403398174078e-05, "loss": 3.0419, "step": 1725500 }, { "epoch": 10.84, "learning_rate": 2.2916185279602166e-05, "loss": 3.0499, "step": 1726000 }, { "epoch": 10.84, "learning_rate": 2.290833657746355e-05, "loss": 3.0387, "step": 1726500 }, { "epoch": 10.84, "learning_rate": 2.290048787532494e-05, "loss": 3.0485, "step": 1727000 }, { "epoch": 10.85, "learning_rate": 2.28926548705906e-05, "loss": 3.0401, "step": 1727500 }, { "epoch": 10.85, "learning_rate": 2.2884806168451985e-05, "loss": 3.0414, "step": 1728000 }, { "epoch": 10.85, "learning_rate": 2.2876957466313373e-05, "loss": 3.0455, "step": 1728500 }, { "epoch": 10.86, "learning_rate": 2.2869108764174757e-05, "loss": 3.0384, "step": 1729000 }, { "epoch": 10.86, "learning_rate": 2.2861260062036146e-05, "loss": 3.0381, "step": 1729500 }, { "epoch": 10.86, "learning_rate": 2.2853411359897527e-05, "loss": 3.0417, "step": 1730000 }, { "epoch": 10.87, "learning_rate": 2.2845562657758915e-05, "loss": 3.0401, "step": 1730500 }, { "epoch": 10.87, "learning_rate": 2.28377139556203e-05, "loss": 3.0456, "step": 1731000 }, { "epoch": 10.87, "learning_rate": 2.282988095088596e-05, "loss": 3.0457, "step": 1731500 }, { "epoch": 10.88, "learning_rate": 2.282203224874735e-05, "loss": 3.0402, "step": 1732000 }, { "epoch": 10.88, "learning_rate": 2.2814183546608734e-05, "loss": 3.0389, "step": 1732500 }, { "epoch": 10.88, "learning_rate": 2.2806350541874395e-05, "loss": 3.0359, "step": 1733000 }, { "epoch": 10.88, "learning_rate": 2.2798501839735784e-05, "loss": 3.0417, "step": 1733500 }, { "epoch": 10.89, "learning_rate": 2.2790653137597168e-05, "loss": 3.0412, "step": 1734000 }, { "epoch": 10.89, "learning_rate": 2.2782804435458553e-05, "loss": 3.0431, "step": 1734500 }, { "epoch": 10.89, "learning_rate": 2.2774971430724218e-05, "loss": 3.0395, "step": 1735000 }, { "epoch": 10.9, "learning_rate": 2.2767122728585603e-05, "loss": 3.0516, "step": 1735500 }, { "epoch": 10.9, "learning_rate": 2.2759274026446987e-05, "loss": 3.046, "step": 1736000 }, { "epoch": 10.9, "learning_rate": 2.2751425324308372e-05, "loss": 3.0387, "step": 1736500 }, { "epoch": 10.91, "learning_rate": 2.274357662216976e-05, "loss": 3.0359, "step": 1737000 }, { "epoch": 10.91, "learning_rate": 2.2735743617435425e-05, "loss": 3.0422, "step": 1737500 }, { "epoch": 10.91, "learning_rate": 2.2727910612701083e-05, "loss": 3.0486, "step": 1738000 }, { "epoch": 10.92, "learning_rate": 2.272006191056247e-05, "loss": 3.0396, "step": 1738500 }, { "epoch": 10.92, "learning_rate": 2.2712213208423856e-05, "loss": 3.0435, "step": 1739000 }, { "epoch": 10.92, "learning_rate": 2.270436450628524e-05, "loss": 3.0429, "step": 1739500 }, { "epoch": 10.93, "learning_rate": 2.269651580414663e-05, "loss": 3.0414, "step": 1740000 }, { "epoch": 10.93, "eval_accuracy": 0.4540145571090232, "eval_loss": 2.959815263748169, "eval_runtime": 1452.1557, "eval_samples_per_second": 57.708, "eval_steps_per_second": 5.771, "step": 1740000 }, { "epoch": 10.93, "learning_rate": 2.2688667102008013e-05, "loss": 3.0427, "step": 1740500 }, { "epoch": 10.93, "learning_rate": 2.2680818399869398e-05, "loss": 3.0403, "step": 1741000 }, { "epoch": 10.93, "learning_rate": 2.2672969697730783e-05, "loss": 3.0411, "step": 1741500 }, { "epoch": 10.94, "learning_rate": 2.266512099559217e-05, "loss": 3.046, "step": 1742000 }, { "epoch": 10.94, "learning_rate": 2.2657287990857832e-05, "loss": 3.0385, "step": 1742500 }, { "epoch": 10.94, "learning_rate": 2.2649439288719217e-05, "loss": 3.0382, "step": 1743000 }, { "epoch": 10.95, "learning_rate": 2.2641590586580605e-05, "loss": 3.0422, "step": 1743500 }, { "epoch": 10.95, "learning_rate": 2.263374188444199e-05, "loss": 3.0467, "step": 1744000 }, { "epoch": 10.95, "learning_rate": 2.2625924577111928e-05, "loss": 3.0462, "step": 1744500 }, { "epoch": 10.96, "learning_rate": 2.2618075874973316e-05, "loss": 3.0425, "step": 1745000 }, { "epoch": 10.96, "learning_rate": 2.26102271728347e-05, "loss": 3.0416, "step": 1745500 }, { "epoch": 10.96, "learning_rate": 2.2602378470696086e-05, "loss": 3.0411, "step": 1746000 }, { "epoch": 10.97, "learning_rate": 2.2594529768557474e-05, "loss": 3.0414, "step": 1746500 }, { "epoch": 10.97, "learning_rate": 2.258668106641886e-05, "loss": 3.0439, "step": 1747000 }, { "epoch": 10.97, "learning_rate": 2.257884806168452e-05, "loss": 3.0401, "step": 1747500 }, { "epoch": 10.98, "learning_rate": 2.2570999359545908e-05, "loss": 3.0375, "step": 1748000 }, { "epoch": 10.98, "learning_rate": 2.2563150657407293e-05, "loss": 3.0402, "step": 1748500 }, { "epoch": 10.98, "learning_rate": 2.2555301955268677e-05, "loss": 3.042, "step": 1749000 }, { "epoch": 10.99, "learning_rate": 2.2547453253130062e-05, "loss": 3.038, "step": 1749500 }, { "epoch": 10.99, "learning_rate": 2.253960455099145e-05, "loss": 3.0401, "step": 1750000 }, { "epoch": 10.99, "learning_rate": 2.2531755848852835e-05, "loss": 3.0474, "step": 1750500 }, { "epoch": 10.99, "learning_rate": 2.252390714671422e-05, "loss": 3.0359, "step": 1751000 }, { "epoch": 11.0, "learning_rate": 2.2516058444575605e-05, "loss": 3.0396, "step": 1751500 }, { "epoch": 11.0, "learning_rate": 2.2508209742436993e-05, "loss": 3.0445, "step": 1752000 }, { "epoch": 11.0, "learning_rate": 2.2500361040298377e-05, "loss": 3.021, "step": 1752500 }, { "epoch": 11.01, "learning_rate": 2.2492512338159762e-05, "loss": 3.0228, "step": 1753000 }, { "epoch": 11.01, "learning_rate": 2.2484679333425427e-05, "loss": 3.019, "step": 1753500 }, { "epoch": 11.01, "learning_rate": 2.247683063128681e-05, "loss": 3.0213, "step": 1754000 }, { "epoch": 11.02, "learning_rate": 2.2468981929148196e-05, "loss": 3.0157, "step": 1754500 }, { "epoch": 11.02, "learning_rate": 2.246113322700958e-05, "loss": 3.0318, "step": 1755000 }, { "epoch": 11.02, "learning_rate": 2.2453315919679523e-05, "loss": 3.0268, "step": 1755500 }, { "epoch": 11.03, "learning_rate": 2.2445467217540907e-05, "loss": 3.0166, "step": 1756000 }, { "epoch": 11.03, "learning_rate": 2.2437618515402295e-05, "loss": 3.0267, "step": 1756500 }, { "epoch": 11.03, "learning_rate": 2.242976981326368e-05, "loss": 3.0258, "step": 1757000 }, { "epoch": 11.04, "learning_rate": 2.2421921111125065e-05, "loss": 3.0225, "step": 1757500 }, { "epoch": 11.04, "learning_rate": 2.241407240898645e-05, "loss": 3.0231, "step": 1758000 }, { "epoch": 11.04, "learning_rate": 2.2406223706847838e-05, "loss": 3.0256, "step": 1758500 }, { "epoch": 11.04, "learning_rate": 2.2398375004709222e-05, "loss": 3.0244, "step": 1759000 }, { "epoch": 11.05, "learning_rate": 2.2390541999974884e-05, "loss": 3.026, "step": 1759500 }, { "epoch": 11.05, "learning_rate": 2.238270899524055e-05, "loss": 3.0234, "step": 1760000 }, { "epoch": 11.05, "learning_rate": 2.2374860293101933e-05, "loss": 3.0297, "step": 1760500 }, { "epoch": 11.06, "learning_rate": 2.2367011590963318e-05, "loss": 3.0256, "step": 1761000 }, { "epoch": 11.06, "learning_rate": 2.2359178586228983e-05, "loss": 3.019, "step": 1761500 }, { "epoch": 11.06, "learning_rate": 2.2351329884090368e-05, "loss": 3.0333, "step": 1762000 }, { "epoch": 11.07, "learning_rate": 2.2343481181951752e-05, "loss": 3.0254, "step": 1762500 }, { "epoch": 11.07, "learning_rate": 2.233563247981314e-05, "loss": 3.0335, "step": 1763000 }, { "epoch": 11.07, "learning_rate": 2.2327783777674525e-05, "loss": 3.0278, "step": 1763500 }, { "epoch": 11.08, "learning_rate": 2.231993507553591e-05, "loss": 3.0241, "step": 1764000 }, { "epoch": 11.08, "learning_rate": 2.2312086373397295e-05, "loss": 3.0285, "step": 1764500 }, { "epoch": 11.08, "learning_rate": 2.2304237671258683e-05, "loss": 3.0316, "step": 1765000 }, { "epoch": 11.09, "learning_rate": 2.2296388969120068e-05, "loss": 3.0306, "step": 1765500 }, { "epoch": 11.09, "learning_rate": 2.228855596438573e-05, "loss": 3.0268, "step": 1766000 }, { "epoch": 11.09, "learning_rate": 2.2280707262247117e-05, "loss": 3.0253, "step": 1766500 }, { "epoch": 11.09, "learning_rate": 2.2272858560108502e-05, "loss": 3.0283, "step": 1767000 }, { "epoch": 11.1, "learning_rate": 2.2265009857969887e-05, "loss": 3.0286, "step": 1767500 }, { "epoch": 11.1, "learning_rate": 2.225716115583127e-05, "loss": 3.0257, "step": 1768000 }, { "epoch": 11.1, "learning_rate": 2.224931245369266e-05, "loss": 3.0305, "step": 1768500 }, { "epoch": 11.11, "learning_rate": 2.224147944895832e-05, "loss": 3.0259, "step": 1769000 }, { "epoch": 11.11, "learning_rate": 2.2233630746819706e-05, "loss": 3.0199, "step": 1769500 }, { "epoch": 11.11, "learning_rate": 2.2225782044681094e-05, "loss": 3.0329, "step": 1770000 }, { "epoch": 11.11, "eval_accuracy": 0.4542067570751885, "eval_loss": 2.9596712589263916, "eval_runtime": 1449.9363, "eval_samples_per_second": 57.796, "eval_steps_per_second": 5.78, "step": 1770000 }, { "epoch": 11.12, "learning_rate": 2.221793334254248e-05, "loss": 3.0241, "step": 1770500 }, { "epoch": 11.12, "learning_rate": 2.2210084640403863e-05, "loss": 3.0208, "step": 1771000 }, { "epoch": 11.12, "learning_rate": 2.2202251635669528e-05, "loss": 3.0224, "step": 1771500 }, { "epoch": 11.13, "learning_rate": 2.2194402933530913e-05, "loss": 3.0319, "step": 1772000 }, { "epoch": 11.13, "learning_rate": 2.2186554231392297e-05, "loss": 3.0264, "step": 1772500 }, { "epoch": 11.13, "learning_rate": 2.2178705529253686e-05, "loss": 3.0286, "step": 1773000 }, { "epoch": 11.14, "learning_rate": 2.217085682711507e-05, "loss": 3.0302, "step": 1773500 }, { "epoch": 11.14, "learning_rate": 2.2163008124976455e-05, "loss": 3.0303, "step": 1774000 }, { "epoch": 11.14, "learning_rate": 2.2155175120242116e-05, "loss": 3.0299, "step": 1774500 }, { "epoch": 11.15, "learning_rate": 2.2147326418103504e-05, "loss": 3.0258, "step": 1775000 }, { "epoch": 11.15, "learning_rate": 2.213947771596489e-05, "loss": 3.0349, "step": 1775500 }, { "epoch": 11.15, "learning_rate": 2.2131629013826274e-05, "loss": 3.0261, "step": 1776000 }, { "epoch": 11.15, "learning_rate": 2.2123780311687662e-05, "loss": 3.0287, "step": 1776500 }, { "epoch": 11.16, "learning_rate": 2.2115947306953323e-05, "loss": 3.0309, "step": 1777000 }, { "epoch": 11.16, "learning_rate": 2.2108098604814708e-05, "loss": 3.0276, "step": 1777500 }, { "epoch": 11.16, "learning_rate": 2.2100249902676096e-05, "loss": 3.0258, "step": 1778000 }, { "epoch": 11.17, "learning_rate": 2.209240120053748e-05, "loss": 3.0291, "step": 1778500 }, { "epoch": 11.17, "learning_rate": 2.2084552498398866e-05, "loss": 3.0304, "step": 1779000 }, { "epoch": 11.17, "learning_rate": 2.207670379626025e-05, "loss": 3.029, "step": 1779500 }, { "epoch": 11.18, "learning_rate": 2.206885509412164e-05, "loss": 3.0274, "step": 1780000 }, { "epoch": 11.18, "learning_rate": 2.2061006391983023e-05, "loss": 3.0241, "step": 1780500 }, { "epoch": 11.18, "learning_rate": 2.2053173387248685e-05, "loss": 3.0275, "step": 1781000 }, { "epoch": 11.19, "learning_rate": 2.2045324685110073e-05, "loss": 3.0333, "step": 1781500 }, { "epoch": 11.19, "learning_rate": 2.2037475982971458e-05, "loss": 3.0284, "step": 1782000 }, { "epoch": 11.19, "learning_rate": 2.2029627280832842e-05, "loss": 3.0318, "step": 1782500 }, { "epoch": 11.2, "learning_rate": 2.2021778578694227e-05, "loss": 3.0324, "step": 1783000 }, { "epoch": 11.2, "learning_rate": 2.2013929876555615e-05, "loss": 3.0341, "step": 1783500 }, { "epoch": 11.2, "learning_rate": 2.2006081174416997e-05, "loss": 3.0251, "step": 1784000 }, { "epoch": 11.2, "learning_rate": 2.1998232472278385e-05, "loss": 3.0298, "step": 1784500 }, { "epoch": 11.21, "learning_rate": 2.199039946754405e-05, "loss": 3.0368, "step": 1785000 }, { "epoch": 11.21, "learning_rate": 2.198255076540543e-05, "loss": 3.0326, "step": 1785500 }, { "epoch": 11.21, "learning_rate": 2.1974717760671096e-05, "loss": 3.0422, "step": 1786000 }, { "epoch": 11.22, "learning_rate": 2.1966869058532484e-05, "loss": 3.0331, "step": 1786500 }, { "epoch": 11.22, "learning_rate": 2.1959020356393865e-05, "loss": 3.0317, "step": 1787000 }, { "epoch": 11.22, "learning_rate": 2.1951171654255253e-05, "loss": 3.0303, "step": 1787500 }, { "epoch": 11.23, "learning_rate": 2.1943322952116638e-05, "loss": 3.0322, "step": 1788000 }, { "epoch": 11.23, "learning_rate": 2.1935474249978026e-05, "loss": 3.0358, "step": 1788500 }, { "epoch": 11.23, "learning_rate": 2.192762554783941e-05, "loss": 3.027, "step": 1789000 }, { "epoch": 11.24, "learning_rate": 2.1919776845700796e-05, "loss": 3.0289, "step": 1789500 }, { "epoch": 11.24, "learning_rate": 2.191194384096646e-05, "loss": 3.0287, "step": 1790000 }, { "epoch": 11.24, "learning_rate": 2.1904095138827842e-05, "loss": 3.0292, "step": 1790500 }, { "epoch": 11.25, "learning_rate": 2.189624643668923e-05, "loss": 3.0319, "step": 1791000 }, { "epoch": 11.25, "learning_rate": 2.188842912935917e-05, "loss": 3.0332, "step": 1791500 }, { "epoch": 11.25, "learning_rate": 2.1880580427220556e-05, "loss": 3.0229, "step": 1792000 }, { "epoch": 11.26, "learning_rate": 2.187274742248622e-05, "loss": 3.0349, "step": 1792500 }, { "epoch": 11.26, "learning_rate": 2.1864898720347605e-05, "loss": 3.0312, "step": 1793000 }, { "epoch": 11.26, "learning_rate": 2.185705001820899e-05, "loss": 3.0319, "step": 1793500 }, { "epoch": 11.26, "learning_rate": 2.1849201316070375e-05, "loss": 3.0323, "step": 1794000 }, { "epoch": 11.27, "learning_rate": 2.1841352613931763e-05, "loss": 3.025, "step": 1794500 }, { "epoch": 11.27, "learning_rate": 2.1833503911793148e-05, "loss": 3.0335, "step": 1795000 }, { "epoch": 11.27, "learning_rate": 2.1825655209654533e-05, "loss": 3.0349, "step": 1795500 }, { "epoch": 11.28, "learning_rate": 2.1817806507515917e-05, "loss": 3.028, "step": 1796000 }, { "epoch": 11.28, "learning_rate": 2.1809957805377305e-05, "loss": 3.0251, "step": 1796500 }, { "epoch": 11.28, "learning_rate": 2.1802109103238687e-05, "loss": 3.0311, "step": 1797000 }, { "epoch": 11.29, "learning_rate": 2.1794260401100075e-05, "loss": 3.032, "step": 1797500 }, { "epoch": 11.29, "learning_rate": 2.1786411698961463e-05, "loss": 3.0345, "step": 1798000 }, { "epoch": 11.29, "learning_rate": 2.177857869422712e-05, "loss": 3.0248, "step": 1798500 }, { "epoch": 11.3, "learning_rate": 2.177072999208851e-05, "loss": 3.0301, "step": 1799000 }, { "epoch": 11.3, "learning_rate": 2.1762881289949897e-05, "loss": 3.0348, "step": 1799500 }, { "epoch": 11.3, "learning_rate": 2.1755032587811282e-05, "loss": 3.0232, "step": 1800000 }, { "epoch": 11.3, "eval_accuracy": 0.45444698078722384, "eval_loss": 2.9568142890930176, "eval_runtime": 1450.3833, "eval_samples_per_second": 57.779, "eval_steps_per_second": 5.778, "step": 1800000 }, { "epoch": 11.31, "learning_rate": 2.1747183885672667e-05, "loss": 3.0291, "step": 1800500 }, { "epoch": 11.31, "learning_rate": 2.1739350880938328e-05, "loss": 3.0322, "step": 1801000 }, { "epoch": 11.31, "learning_rate": 2.1731502178799716e-05, "loss": 3.0288, "step": 1801500 }, { "epoch": 11.31, "learning_rate": 2.17236534766611e-05, "loss": 3.031, "step": 1802000 }, { "epoch": 11.32, "learning_rate": 2.1715804774522486e-05, "loss": 3.0337, "step": 1802500 }, { "epoch": 11.32, "learning_rate": 2.1707956072383874e-05, "loss": 3.0279, "step": 1803000 }, { "epoch": 11.32, "learning_rate": 2.1700107370245255e-05, "loss": 3.0348, "step": 1803500 }, { "epoch": 11.33, "learning_rate": 2.1692258668106643e-05, "loss": 3.0399, "step": 1804000 }, { "epoch": 11.33, "learning_rate": 2.1684409965968028e-05, "loss": 3.0339, "step": 1804500 }, { "epoch": 11.33, "learning_rate": 2.167657696123369e-05, "loss": 3.028, "step": 1805000 }, { "epoch": 11.34, "learning_rate": 2.1668728259095078e-05, "loss": 3.034, "step": 1805500 }, { "epoch": 11.34, "learning_rate": 2.1660879556956462e-05, "loss": 3.0336, "step": 1806000 }, { "epoch": 11.34, "learning_rate": 2.165303085481785e-05, "loss": 3.0296, "step": 1806500 }, { "epoch": 11.35, "learning_rate": 2.1645197850083512e-05, "loss": 3.0356, "step": 1807000 }, { "epoch": 11.35, "learning_rate": 2.1637349147944897e-05, "loss": 3.0298, "step": 1807500 }, { "epoch": 11.35, "learning_rate": 2.1629500445806285e-05, "loss": 3.0309, "step": 1808000 }, { "epoch": 11.36, "learning_rate": 2.1621651743667666e-05, "loss": 3.0289, "step": 1808500 }, { "epoch": 11.36, "learning_rate": 2.1613803041529054e-05, "loss": 3.0358, "step": 1809000 }, { "epoch": 11.36, "learning_rate": 2.1605985734198996e-05, "loss": 3.0343, "step": 1809500 }, { "epoch": 11.36, "learning_rate": 2.1598152729464657e-05, "loss": 3.0316, "step": 1810000 }, { "epoch": 11.37, "learning_rate": 2.1590304027326042e-05, "loss": 3.0387, "step": 1810500 }, { "epoch": 11.37, "learning_rate": 2.158245532518743e-05, "loss": 3.0325, "step": 1811000 }, { "epoch": 11.37, "learning_rate": 2.157460662304881e-05, "loss": 3.0368, "step": 1811500 }, { "epoch": 11.38, "learning_rate": 2.15667579209102e-05, "loss": 3.0341, "step": 1812000 }, { "epoch": 11.38, "learning_rate": 2.1558909218771587e-05, "loss": 3.0338, "step": 1812500 }, { "epoch": 11.38, "learning_rate": 2.155106051663297e-05, "loss": 3.039, "step": 1813000 }, { "epoch": 11.39, "learning_rate": 2.1543211814494357e-05, "loss": 3.032, "step": 1813500 }, { "epoch": 11.39, "learning_rate": 2.1535378809760018e-05, "loss": 3.038, "step": 1814000 }, { "epoch": 11.39, "learning_rate": 2.1527530107621406e-05, "loss": 3.0371, "step": 1814500 }, { "epoch": 11.4, "learning_rate": 2.151968140548279e-05, "loss": 3.0395, "step": 1815000 }, { "epoch": 11.4, "learning_rate": 2.1511832703344176e-05, "loss": 3.0228, "step": 1815500 }, { "epoch": 11.4, "learning_rate": 2.1503984001205564e-05, "loss": 3.0342, "step": 1816000 }, { "epoch": 11.41, "learning_rate": 2.1496135299066945e-05, "loss": 3.0368, "step": 1816500 }, { "epoch": 11.41, "learning_rate": 2.1488286596928334e-05, "loss": 3.0339, "step": 1817000 }, { "epoch": 11.41, "learning_rate": 2.1480437894789718e-05, "loss": 3.0357, "step": 1817500 }, { "epoch": 11.42, "learning_rate": 2.147260489005538e-05, "loss": 3.0343, "step": 1818000 }, { "epoch": 11.42, "learning_rate": 2.1464756187916768e-05, "loss": 3.0274, "step": 1818500 }, { "epoch": 11.42, "learning_rate": 2.1456907485778153e-05, "loss": 3.0352, "step": 1819000 }, { "epoch": 11.42, "learning_rate": 2.1449074481043814e-05, "loss": 3.0292, "step": 1819500 }, { "epoch": 11.43, "learning_rate": 2.1441225778905202e-05, "loss": 3.0346, "step": 1820000 }, { "epoch": 11.43, "learning_rate": 2.1433377076766587e-05, "loss": 3.0286, "step": 1820500 }, { "epoch": 11.43, "learning_rate": 2.1425528374627975e-05, "loss": 3.0293, "step": 1821000 }, { "epoch": 11.44, "learning_rate": 2.1417695369893636e-05, "loss": 3.0238, "step": 1821500 }, { "epoch": 11.44, "learning_rate": 2.140984666775502e-05, "loss": 3.0339, "step": 1822000 }, { "epoch": 11.44, "learning_rate": 2.140199796561641e-05, "loss": 3.0283, "step": 1822500 }, { "epoch": 11.45, "learning_rate": 2.139414926347779e-05, "loss": 3.0292, "step": 1823000 }, { "epoch": 11.45, "learning_rate": 2.138630056133918e-05, "loss": 3.0329, "step": 1823500 }, { "epoch": 11.45, "learning_rate": 2.1378451859200563e-05, "loss": 3.0291, "step": 1824000 }, { "epoch": 11.46, "learning_rate": 2.1370603157061948e-05, "loss": 3.0368, "step": 1824500 }, { "epoch": 11.46, "learning_rate": 2.1362754454923333e-05, "loss": 3.0321, "step": 1825000 }, { "epoch": 11.46, "learning_rate": 2.135490575278472e-05, "loss": 3.0365, "step": 1825500 }, { "epoch": 11.47, "learning_rate": 2.1347072748050386e-05, "loss": 3.031, "step": 1826000 }, { "epoch": 11.47, "learning_rate": 2.1339239743316047e-05, "loss": 3.0334, "step": 1826500 }, { "epoch": 11.47, "learning_rate": 2.1331391041177432e-05, "loss": 3.0331, "step": 1827000 }, { "epoch": 11.47, "learning_rate": 2.132354233903882e-05, "loss": 3.0305, "step": 1827500 }, { "epoch": 11.48, "learning_rate": 2.131570933430448e-05, "loss": 3.0289, "step": 1828000 }, { "epoch": 11.48, "learning_rate": 2.1307860632165866e-05, "loss": 3.029, "step": 1828500 }, { "epoch": 11.48, "learning_rate": 2.1300011930027254e-05, "loss": 3.0273, "step": 1829000 }, { "epoch": 11.49, "learning_rate": 2.1292163227888636e-05, "loss": 3.0323, "step": 1829500 }, { "epoch": 11.49, "learning_rate": 2.1284314525750024e-05, "loss": 3.0281, "step": 1830000 }, { "epoch": 11.49, "eval_accuracy": 0.4547652999519191, "eval_loss": 2.9545164108276367, "eval_runtime": 1449.5474, "eval_samples_per_second": 57.812, "eval_steps_per_second": 5.782, "step": 1830000 }, { "epoch": 11.49, "learning_rate": 2.127646582361141e-05, "loss": 3.0306, "step": 1830500 }, { "epoch": 11.5, "learning_rate": 2.1268617121472793e-05, "loss": 3.0308, "step": 1831000 }, { "epoch": 11.5, "learning_rate": 2.1260768419334178e-05, "loss": 3.039, "step": 1831500 }, { "epoch": 11.5, "learning_rate": 2.1252919717195566e-05, "loss": 3.0323, "step": 1832000 }, { "epoch": 11.51, "learning_rate": 2.124507101505695e-05, "loss": 3.0308, "step": 1832500 }, { "epoch": 11.51, "learning_rate": 2.1237222312918336e-05, "loss": 3.0282, "step": 1833000 }, { "epoch": 11.51, "learning_rate": 2.1229373610779724e-05, "loss": 3.0334, "step": 1833500 }, { "epoch": 11.52, "learning_rate": 2.122152490864111e-05, "loss": 3.0235, "step": 1834000 }, { "epoch": 11.52, "learning_rate": 2.121369190390677e-05, "loss": 3.0349, "step": 1834500 }, { "epoch": 11.52, "learning_rate": 2.1205843201768158e-05, "loss": 3.0323, "step": 1835000 }, { "epoch": 11.53, "learning_rate": 2.119801019703382e-05, "loss": 3.0352, "step": 1835500 }, { "epoch": 11.53, "learning_rate": 2.1190161494895204e-05, "loss": 3.026, "step": 1836000 }, { "epoch": 11.53, "learning_rate": 2.118231279275659e-05, "loss": 3.03, "step": 1836500 }, { "epoch": 11.53, "learning_rate": 2.1174464090617977e-05, "loss": 3.0354, "step": 1837000 }, { "epoch": 11.54, "learning_rate": 2.1166615388479365e-05, "loss": 3.0235, "step": 1837500 }, { "epoch": 11.54, "learning_rate": 2.1158782383745023e-05, "loss": 3.0364, "step": 1838000 }, { "epoch": 11.54, "learning_rate": 2.115093368160641e-05, "loss": 3.0357, "step": 1838500 }, { "epoch": 11.55, "learning_rate": 2.1143084979467796e-05, "loss": 3.038, "step": 1839000 }, { "epoch": 11.55, "learning_rate": 2.113523627732918e-05, "loss": 3.0286, "step": 1839500 }, { "epoch": 11.55, "learning_rate": 2.112738757519057e-05, "loss": 3.0313, "step": 1840000 }, { "epoch": 11.56, "learning_rate": 2.1119538873051953e-05, "loss": 3.0269, "step": 1840500 }, { "epoch": 11.56, "learning_rate": 2.1111690170913338e-05, "loss": 3.0332, "step": 1841000 }, { "epoch": 11.56, "learning_rate": 2.1103841468774723e-05, "loss": 3.0311, "step": 1841500 }, { "epoch": 11.57, "learning_rate": 2.1096008464040388e-05, "loss": 3.0332, "step": 1842000 }, { "epoch": 11.57, "learning_rate": 2.1088159761901772e-05, "loss": 3.0311, "step": 1842500 }, { "epoch": 11.57, "learning_rate": 2.1080311059763157e-05, "loss": 3.0292, "step": 1843000 }, { "epoch": 11.58, "learning_rate": 2.1072462357624545e-05, "loss": 3.0313, "step": 1843500 }, { "epoch": 11.58, "learning_rate": 2.106461365548593e-05, "loss": 3.0336, "step": 1844000 }, { "epoch": 11.58, "learning_rate": 2.1056764953347315e-05, "loss": 3.0292, "step": 1844500 }, { "epoch": 11.58, "learning_rate": 2.10489162512087e-05, "loss": 3.0328, "step": 1845000 }, { "epoch": 11.59, "learning_rate": 2.1041067549070088e-05, "loss": 3.0305, "step": 1845500 }, { "epoch": 11.59, "learning_rate": 2.1033218846931472e-05, "loss": 3.0337, "step": 1846000 }, { "epoch": 11.59, "learning_rate": 2.1025385842197134e-05, "loss": 3.0414, "step": 1846500 }, { "epoch": 11.6, "learning_rate": 2.1017537140058522e-05, "loss": 3.0312, "step": 1847000 }, { "epoch": 11.6, "learning_rate": 2.1009688437919907e-05, "loss": 3.0224, "step": 1847500 }, { "epoch": 11.6, "learning_rate": 2.100183973578129e-05, "loss": 3.029, "step": 1848000 }, { "epoch": 11.61, "learning_rate": 2.0993991033642676e-05, "loss": 3.0311, "step": 1848500 }, { "epoch": 11.61, "learning_rate": 2.0986142331504064e-05, "loss": 3.0339, "step": 1849000 }, { "epoch": 11.61, "learning_rate": 2.097829362936545e-05, "loss": 3.0263, "step": 1849500 }, { "epoch": 11.62, "learning_rate": 2.097046062463111e-05, "loss": 3.0266, "step": 1850000 }, { "epoch": 11.62, "learning_rate": 2.09626119224925e-05, "loss": 3.0332, "step": 1850500 }, { "epoch": 11.62, "learning_rate": 2.0954763220353883e-05, "loss": 3.0312, "step": 1851000 }, { "epoch": 11.63, "learning_rate": 2.0946914518215268e-05, "loss": 3.0276, "step": 1851500 }, { "epoch": 11.63, "learning_rate": 2.0939081513480933e-05, "loss": 3.03, "step": 1852000 }, { "epoch": 11.63, "learning_rate": 2.0931232811342317e-05, "loss": 3.0343, "step": 1852500 }, { "epoch": 11.63, "learning_rate": 2.0923384109203702e-05, "loss": 3.0368, "step": 1853000 }, { "epoch": 11.64, "learning_rate": 2.091553540706509e-05, "loss": 3.0346, "step": 1853500 }, { "epoch": 11.64, "learning_rate": 2.0907686704926475e-05, "loss": 3.0231, "step": 1854000 }, { "epoch": 11.64, "learning_rate": 2.089983800278786e-05, "loss": 3.0332, "step": 1854500 }, { "epoch": 11.65, "learning_rate": 2.0891989300649245e-05, "loss": 3.0303, "step": 1855000 }, { "epoch": 11.65, "learning_rate": 2.0884140598510633e-05, "loss": 3.0335, "step": 1855500 }, { "epoch": 11.65, "learning_rate": 2.0876307593776294e-05, "loss": 3.0289, "step": 1856000 }, { "epoch": 11.66, "learning_rate": 2.086845889163768e-05, "loss": 3.0269, "step": 1856500 }, { "epoch": 11.66, "learning_rate": 2.0860610189499067e-05, "loss": 3.0247, "step": 1857000 }, { "epoch": 11.66, "learning_rate": 2.0852761487360452e-05, "loss": 3.0315, "step": 1857500 }, { "epoch": 11.67, "learning_rate": 2.0844912785221836e-05, "loss": 3.0321, "step": 1858000 }, { "epoch": 11.67, "learning_rate": 2.083706408308322e-05, "loss": 3.0297, "step": 1858500 }, { "epoch": 11.67, "learning_rate": 2.0829231078348886e-05, "loss": 3.0373, "step": 1859000 }, { "epoch": 11.68, "learning_rate": 2.082138237621027e-05, "loss": 3.0376, "step": 1859500 }, { "epoch": 11.68, "learning_rate": 2.0813533674071655e-05, "loss": 3.0374, "step": 1860000 }, { "epoch": 11.68, "eval_accuracy": 0.45490199907211637, "eval_loss": 2.9541258811950684, "eval_runtime": 1450.6463, "eval_samples_per_second": 57.768, "eval_steps_per_second": 5.777, "step": 1860000 }, { "epoch": 11.68, "learning_rate": 2.0805684971933044e-05, "loss": 3.035, "step": 1860500 }, { "epoch": 11.69, "learning_rate": 2.0797836269794428e-05, "loss": 3.0318, "step": 1861000 }, { "epoch": 11.69, "learning_rate": 2.0789987567655813e-05, "loss": 3.0358, "step": 1861500 }, { "epoch": 11.69, "learning_rate": 2.0782138865517198e-05, "loss": 3.0321, "step": 1862000 }, { "epoch": 11.69, "learning_rate": 2.0774290163378586e-05, "loss": 3.0278, "step": 1862500 }, { "epoch": 11.7, "learning_rate": 2.0766472856048524e-05, "loss": 3.0288, "step": 1863000 }, { "epoch": 11.7, "learning_rate": 2.0758624153909912e-05, "loss": 3.0316, "step": 1863500 }, { "epoch": 11.7, "learning_rate": 2.0750775451771297e-05, "loss": 3.0427, "step": 1864000 }, { "epoch": 11.71, "learning_rate": 2.074292674963268e-05, "loss": 3.0326, "step": 1864500 }, { "epoch": 11.71, "learning_rate": 2.0735078047494066e-05, "loss": 3.0324, "step": 1865000 }, { "epoch": 11.71, "learning_rate": 2.0727229345355454e-05, "loss": 3.0328, "step": 1865500 }, { "epoch": 11.72, "learning_rate": 2.071938064321684e-05, "loss": 3.0323, "step": 1866000 }, { "epoch": 11.72, "learning_rate": 2.0711531941078224e-05, "loss": 3.0254, "step": 1866500 }, { "epoch": 11.72, "learning_rate": 2.070369893634389e-05, "loss": 3.0331, "step": 1867000 }, { "epoch": 11.73, "learning_rate": 2.0695850234205273e-05, "loss": 3.0314, "step": 1867500 }, { "epoch": 11.73, "learning_rate": 2.0688001532066658e-05, "loss": 3.0328, "step": 1868000 }, { "epoch": 11.73, "learning_rate": 2.0680152829928043e-05, "loss": 3.0358, "step": 1868500 }, { "epoch": 11.74, "learning_rate": 2.067230412778943e-05, "loss": 3.0296, "step": 1869000 }, { "epoch": 11.74, "learning_rate": 2.0664471123055092e-05, "loss": 3.0318, "step": 1869500 }, { "epoch": 11.74, "learning_rate": 2.0656622420916477e-05, "loss": 3.0287, "step": 1870000 }, { "epoch": 11.74, "learning_rate": 2.0648773718777865e-05, "loss": 3.0313, "step": 1870500 }, { "epoch": 11.75, "learning_rate": 2.0640940714043527e-05, "loss": 3.0319, "step": 1871000 }, { "epoch": 11.75, "learning_rate": 2.063309201190491e-05, "loss": 3.0323, "step": 1871500 }, { "epoch": 11.75, "learning_rate": 2.06252433097663e-05, "loss": 3.0233, "step": 1872000 }, { "epoch": 11.76, "learning_rate": 2.061739460762768e-05, "loss": 3.0359, "step": 1872500 }, { "epoch": 11.76, "learning_rate": 2.060954590548907e-05, "loss": 3.0337, "step": 1873000 }, { "epoch": 11.76, "learning_rate": 2.0601697203350457e-05, "loss": 3.0364, "step": 1873500 }, { "epoch": 11.77, "learning_rate": 2.0593848501211842e-05, "loss": 3.0374, "step": 1874000 }, { "epoch": 11.77, "learning_rate": 2.0585999799073227e-05, "loss": 3.033, "step": 1874500 }, { "epoch": 11.77, "learning_rate": 2.057815109693461e-05, "loss": 3.0414, "step": 1875000 }, { "epoch": 11.78, "learning_rate": 2.0570318092200276e-05, "loss": 3.035, "step": 1875500 }, { "epoch": 11.78, "learning_rate": 2.056246939006166e-05, "loss": 3.0313, "step": 1876000 }, { "epoch": 11.78, "learning_rate": 2.0554620687923046e-05, "loss": 3.028, "step": 1876500 }, { "epoch": 11.79, "learning_rate": 2.0546771985784434e-05, "loss": 3.0325, "step": 1877000 }, { "epoch": 11.79, "learning_rate": 2.053893898105009e-05, "loss": 3.0301, "step": 1877500 }, { "epoch": 11.79, "learning_rate": 2.053109027891148e-05, "loss": 3.0337, "step": 1878000 }, { "epoch": 11.8, "learning_rate": 2.0523241576772868e-05, "loss": 3.037, "step": 1878500 }, { "epoch": 11.8, "learning_rate": 2.0515392874634253e-05, "loss": 3.0311, "step": 1879000 }, { "epoch": 11.8, "learning_rate": 2.0507544172495637e-05, "loss": 3.033, "step": 1879500 }, { "epoch": 11.8, "learning_rate": 2.0499695470357022e-05, "loss": 3.0314, "step": 1880000 }, { "epoch": 11.81, "learning_rate": 2.049184676821841e-05, "loss": 3.0292, "step": 1880500 }, { "epoch": 11.81, "learning_rate": 2.048401376348407e-05, "loss": 3.0297, "step": 1881000 }, { "epoch": 11.81, "learning_rate": 2.0476165061345456e-05, "loss": 3.0329, "step": 1881500 }, { "epoch": 11.82, "learning_rate": 2.0468316359206845e-05, "loss": 3.0307, "step": 1882000 }, { "epoch": 11.82, "learning_rate": 2.0460467657068226e-05, "loss": 3.0319, "step": 1882500 }, { "epoch": 11.82, "learning_rate": 2.0452618954929614e-05, "loss": 3.037, "step": 1883000 }, { "epoch": 11.83, "learning_rate": 2.0444770252791e-05, "loss": 3.0273, "step": 1883500 }, { "epoch": 11.83, "learning_rate": 2.0436921550652387e-05, "loss": 3.0256, "step": 1884000 }, { "epoch": 11.83, "learning_rate": 2.0429072848513768e-05, "loss": 3.0319, "step": 1884500 }, { "epoch": 11.84, "learning_rate": 2.0421239843779433e-05, "loss": 3.036, "step": 1885000 }, { "epoch": 11.84, "learning_rate": 2.041339114164082e-05, "loss": 3.0314, "step": 1885500 }, { "epoch": 11.84, "learning_rate": 2.0405558136906482e-05, "loss": 3.0351, "step": 1886000 }, { "epoch": 11.85, "learning_rate": 2.0397709434767867e-05, "loss": 3.0381, "step": 1886500 }, { "epoch": 11.85, "learning_rate": 2.0389860732629255e-05, "loss": 3.0362, "step": 1887000 }, { "epoch": 11.85, "learning_rate": 2.0382012030490637e-05, "loss": 3.0296, "step": 1887500 }, { "epoch": 11.85, "learning_rate": 2.0374163328352025e-05, "loss": 3.0289, "step": 1888000 }, { "epoch": 11.86, "learning_rate": 2.036633032361769e-05, "loss": 3.0252, "step": 1888500 }, { "epoch": 11.86, "learning_rate": 2.035848162147907e-05, "loss": 3.0265, "step": 1889000 }, { "epoch": 11.86, "learning_rate": 2.035063291934046e-05, "loss": 3.0361, "step": 1889500 }, { "epoch": 11.87, "learning_rate": 2.0342784217201844e-05, "loss": 3.0343, "step": 1890000 }, { "epoch": 11.87, "eval_accuracy": 0.4553703268284779, "eval_loss": 2.9491374492645264, "eval_runtime": 1452.3246, "eval_samples_per_second": 57.701, "eval_steps_per_second": 5.771, "step": 1890000 }, { "epoch": 11.87, "learning_rate": 2.0334935515063232e-05, "loss": 3.0289, "step": 1890500 }, { "epoch": 11.87, "learning_rate": 2.0327102510328893e-05, "loss": 3.0243, "step": 1891000 }, { "epoch": 11.88, "learning_rate": 2.0319253808190278e-05, "loss": 3.028, "step": 1891500 }, { "epoch": 11.88, "learning_rate": 2.0311405106051666e-05, "loss": 3.0303, "step": 1892000 }, { "epoch": 11.88, "learning_rate": 2.0303556403913048e-05, "loss": 3.0285, "step": 1892500 }, { "epoch": 11.89, "learning_rate": 2.0295707701774436e-05, "loss": 3.0302, "step": 1893000 }, { "epoch": 11.89, "learning_rate": 2.028785899963582e-05, "loss": 3.0327, "step": 1893500 }, { "epoch": 11.89, "learning_rate": 2.0280010297497205e-05, "loss": 3.029, "step": 1894000 }, { "epoch": 11.9, "learning_rate": 2.0272161595358593e-05, "loss": 3.0346, "step": 1894500 }, { "epoch": 11.9, "learning_rate": 2.0264312893219978e-05, "loss": 3.0272, "step": 1895000 }, { "epoch": 11.9, "learning_rate": 2.025647988848564e-05, "loss": 3.0245, "step": 1895500 }, { "epoch": 11.9, "learning_rate": 2.0248631186347028e-05, "loss": 3.0358, "step": 1896000 }, { "epoch": 11.91, "learning_rate": 2.0240782484208412e-05, "loss": 3.0351, "step": 1896500 }, { "epoch": 11.91, "learning_rate": 2.02329337820698e-05, "loss": 3.0237, "step": 1897000 }, { "epoch": 11.91, "learning_rate": 2.0225132172144015e-05, "loss": 3.0374, "step": 1897500 }, { "epoch": 11.92, "learning_rate": 2.0217283470005403e-05, "loss": 3.0252, "step": 1898000 }, { "epoch": 11.92, "learning_rate": 2.020945046527106e-05, "loss": 3.0315, "step": 1898500 }, { "epoch": 11.92, "learning_rate": 2.020160176313245e-05, "loss": 3.0328, "step": 1899000 }, { "epoch": 11.93, "learning_rate": 2.0193753060993837e-05, "loss": 3.0315, "step": 1899500 }, { "epoch": 11.93, "learning_rate": 2.0185904358855222e-05, "loss": 3.0336, "step": 1900000 }, { "epoch": 11.93, "learning_rate": 2.0178055656716607e-05, "loss": 3.037, "step": 1900500 }, { "epoch": 11.94, "learning_rate": 2.017020695457799e-05, "loss": 3.0341, "step": 1901000 }, { "epoch": 11.94, "learning_rate": 2.016235825243938e-05, "loss": 3.0298, "step": 1901500 }, { "epoch": 11.94, "learning_rate": 2.015450955030076e-05, "loss": 3.0306, "step": 1902000 }, { "epoch": 11.95, "learning_rate": 2.014666084816215e-05, "loss": 3.0319, "step": 1902500 }, { "epoch": 11.95, "learning_rate": 2.0138827843427814e-05, "loss": 3.0273, "step": 1903000 }, { "epoch": 11.95, "learning_rate": 2.0130979141289195e-05, "loss": 3.0311, "step": 1903500 }, { "epoch": 11.96, "learning_rate": 2.0123130439150583e-05, "loss": 3.0243, "step": 1904000 }, { "epoch": 11.96, "learning_rate": 2.0115281737011968e-05, "loss": 3.0269, "step": 1904500 }, { "epoch": 11.96, "learning_rate": 2.0107433034873356e-05, "loss": 3.0363, "step": 1905000 }, { "epoch": 11.96, "learning_rate": 2.0099584332734738e-05, "loss": 3.028, "step": 1905500 }, { "epoch": 11.97, "learning_rate": 2.0091735630596126e-05, "loss": 3.0307, "step": 1906000 }, { "epoch": 11.97, "learning_rate": 2.008388692845751e-05, "loss": 3.0336, "step": 1906500 }, { "epoch": 11.97, "learning_rate": 2.0076053923723172e-05, "loss": 3.023, "step": 1907000 }, { "epoch": 11.98, "learning_rate": 2.006820522158456e-05, "loss": 3.0266, "step": 1907500 }, { "epoch": 11.98, "learning_rate": 2.0060356519445945e-05, "loss": 3.0302, "step": 1908000 }, { "epoch": 11.98, "learning_rate": 2.005250781730733e-05, "loss": 3.0332, "step": 1908500 }, { "epoch": 11.99, "learning_rate": 2.0044659115168718e-05, "loss": 3.0281, "step": 1909000 }, { "epoch": 11.99, "learning_rate": 2.0036810413030102e-05, "loss": 3.0301, "step": 1909500 }, { "epoch": 11.99, "learning_rate": 2.002896171089149e-05, "loss": 3.0363, "step": 1910000 }, { "epoch": 12.0, "learning_rate": 2.002112870615715e-05, "loss": 3.0323, "step": 1910500 }, { "epoch": 12.0, "learning_rate": 2.0013280004018537e-05, "loss": 3.0346, "step": 1911000 }, { "epoch": 12.0, "learning_rate": 2.0005431301879925e-05, "loss": 3.0154, "step": 1911500 }, { "epoch": 12.01, "learning_rate": 1.9997582599741306e-05, "loss": 3.0098, "step": 1912000 }, { "epoch": 12.01, "learning_rate": 1.9989733897602694e-05, "loss": 3.0147, "step": 1912500 }, { "epoch": 12.01, "learning_rate": 1.998188519546408e-05, "loss": 3.0169, "step": 1913000 }, { "epoch": 12.01, "learning_rate": 1.9974036493325464e-05, "loss": 3.0157, "step": 1913500 }, { "epoch": 12.02, "learning_rate": 1.996618779118685e-05, "loss": 3.0096, "step": 1914000 }, { "epoch": 12.02, "learning_rate": 1.9958354786452513e-05, "loss": 3.0157, "step": 1914500 }, { "epoch": 12.02, "learning_rate": 1.9950506084313898e-05, "loss": 3.019, "step": 1915000 }, { "epoch": 12.03, "learning_rate": 1.9942657382175283e-05, "loss": 3.0102, "step": 1915500 }, { "epoch": 12.03, "learning_rate": 1.993480868003667e-05, "loss": 3.0092, "step": 1916000 }, { "epoch": 12.03, "learning_rate": 1.9926959977898056e-05, "loss": 3.0128, "step": 1916500 }, { "epoch": 12.04, "learning_rate": 1.9919126973163717e-05, "loss": 3.0192, "step": 1917000 }, { "epoch": 12.04, "learning_rate": 1.9911278271025105e-05, "loss": 3.0227, "step": 1917500 }, { "epoch": 12.04, "learning_rate": 1.990342956888649e-05, "loss": 3.0124, "step": 1918000 }, { "epoch": 12.05, "learning_rate": 1.989559656415215e-05, "loss": 3.0147, "step": 1918500 }, { "epoch": 12.05, "learning_rate": 1.988774786201354e-05, "loss": 3.0186, "step": 1919000 }, { "epoch": 12.05, "learning_rate": 1.9879899159874924e-05, "loss": 3.0106, "step": 1919500 }, { "epoch": 12.06, "learning_rate": 1.9872066155140585e-05, "loss": 3.0153, "step": 1920000 }, { "epoch": 12.06, "eval_accuracy": 0.4555918636226401, "eval_loss": 2.949010133743286, "eval_runtime": 1450.6884, "eval_samples_per_second": 57.766, "eval_steps_per_second": 5.777, "step": 1920000 }, { "epoch": 12.06, "learning_rate": 1.9864217453001974e-05, "loss": 3.0146, "step": 1920500 }, { "epoch": 12.06, "learning_rate": 1.985636875086336e-05, "loss": 3.0184, "step": 1921000 }, { "epoch": 12.07, "learning_rate": 1.9848520048724743e-05, "loss": 3.0202, "step": 1921500 }, { "epoch": 12.07, "learning_rate": 1.9840671346586128e-05, "loss": 3.0161, "step": 1922000 }, { "epoch": 12.07, "learning_rate": 1.9832822644447516e-05, "loss": 3.0142, "step": 1922500 }, { "epoch": 12.07, "learning_rate": 1.98249739423089e-05, "loss": 3.0141, "step": 1923000 }, { "epoch": 12.08, "learning_rate": 1.9817125240170285e-05, "loss": 3.0172, "step": 1923500 }, { "epoch": 12.08, "learning_rate": 1.980927653803167e-05, "loss": 3.0184, "step": 1924000 }, { "epoch": 12.08, "learning_rate": 1.9801427835893058e-05, "loss": 3.02, "step": 1924500 }, { "epoch": 12.09, "learning_rate": 1.9793579133754443e-05, "loss": 3.0156, "step": 1925000 }, { "epoch": 12.09, "learning_rate": 1.9785746129020104e-05, "loss": 3.0184, "step": 1925500 }, { "epoch": 12.09, "learning_rate": 1.9777897426881493e-05, "loss": 3.0188, "step": 1926000 }, { "epoch": 12.1, "learning_rate": 1.9770048724742877e-05, "loss": 3.0279, "step": 1926500 }, { "epoch": 12.1, "learning_rate": 1.9762200022604262e-05, "loss": 3.0167, "step": 1927000 }, { "epoch": 12.1, "learning_rate": 1.9754367017869927e-05, "loss": 3.0199, "step": 1927500 }, { "epoch": 12.11, "learning_rate": 1.974651831573131e-05, "loss": 3.0268, "step": 1928000 }, { "epoch": 12.11, "learning_rate": 1.9738669613592696e-05, "loss": 3.0228, "step": 1928500 }, { "epoch": 12.11, "learning_rate": 1.973082091145408e-05, "loss": 3.0186, "step": 1929000 }, { "epoch": 12.12, "learning_rate": 1.972297220931547e-05, "loss": 3.0182, "step": 1929500 }, { "epoch": 12.12, "learning_rate": 1.9715123507176854e-05, "loss": 3.0172, "step": 1930000 }, { "epoch": 12.12, "learning_rate": 1.970727480503824e-05, "loss": 3.0158, "step": 1930500 }, { "epoch": 12.12, "learning_rate": 1.9699426102899627e-05, "loss": 3.0196, "step": 1931000 }, { "epoch": 12.13, "learning_rate": 1.9691593098165288e-05, "loss": 3.0196, "step": 1931500 }, { "epoch": 12.13, "learning_rate": 1.9683744396026673e-05, "loss": 3.0212, "step": 1932000 }, { "epoch": 12.13, "learning_rate": 1.967589569388806e-05, "loss": 3.0159, "step": 1932500 }, { "epoch": 12.14, "learning_rate": 1.9668046991749446e-05, "loss": 3.0234, "step": 1933000 }, { "epoch": 12.14, "learning_rate": 1.9660213987015107e-05, "loss": 3.0132, "step": 1933500 }, { "epoch": 12.14, "learning_rate": 1.9652365284876495e-05, "loss": 3.0181, "step": 1934000 }, { "epoch": 12.15, "learning_rate": 1.964451658273788e-05, "loss": 3.0092, "step": 1934500 }, { "epoch": 12.15, "learning_rate": 1.9636667880599265e-05, "loss": 3.018, "step": 1935000 }, { "epoch": 12.15, "learning_rate": 1.962881917846065e-05, "loss": 3.0254, "step": 1935500 }, { "epoch": 12.16, "learning_rate": 1.9620970476322038e-05, "loss": 3.017, "step": 1936000 }, { "epoch": 12.16, "learning_rate": 1.9613121774183422e-05, "loss": 3.0159, "step": 1936500 }, { "epoch": 12.16, "learning_rate": 1.9605273072044807e-05, "loss": 3.0179, "step": 1937000 }, { "epoch": 12.17, "learning_rate": 1.9597424369906192e-05, "loss": 3.0188, "step": 1937500 }, { "epoch": 12.17, "learning_rate": 1.958957566776758e-05, "loss": 3.0269, "step": 1938000 }, { "epoch": 12.17, "learning_rate": 1.9581726965628965e-05, "loss": 3.0197, "step": 1938500 }, { "epoch": 12.17, "learning_rate": 1.9573909658298906e-05, "loss": 3.0224, "step": 1939000 }, { "epoch": 12.18, "learning_rate": 1.956606095616029e-05, "loss": 3.0157, "step": 1939500 }, { "epoch": 12.18, "learning_rate": 1.9558212254021676e-05, "loss": 3.0244, "step": 1940000 }, { "epoch": 12.18, "learning_rate": 1.955036355188306e-05, "loss": 3.0197, "step": 1940500 }, { "epoch": 12.19, "learning_rate": 1.954251484974445e-05, "loss": 3.0157, "step": 1941000 }, { "epoch": 12.19, "learning_rate": 1.9534697542414386e-05, "loss": 3.013, "step": 1941500 }, { "epoch": 12.19, "learning_rate": 1.952684884027577e-05, "loss": 3.0232, "step": 1942000 }, { "epoch": 12.2, "learning_rate": 1.951900013813716e-05, "loss": 3.0133, "step": 1942500 }, { "epoch": 12.2, "learning_rate": 1.9511151435998544e-05, "loss": 3.0154, "step": 1943000 }, { "epoch": 12.2, "learning_rate": 1.950330273385993e-05, "loss": 3.0182, "step": 1943500 }, { "epoch": 12.21, "learning_rate": 1.9495454031721317e-05, "loss": 3.0252, "step": 1944000 }, { "epoch": 12.21, "learning_rate": 1.94876053295827e-05, "loss": 3.0179, "step": 1944500 }, { "epoch": 12.21, "learning_rate": 1.9479756627444086e-05, "loss": 3.0166, "step": 1945000 }, { "epoch": 12.22, "learning_rate": 1.947190792530547e-05, "loss": 3.0284, "step": 1945500 }, { "epoch": 12.22, "learning_rate": 1.946405922316686e-05, "loss": 3.0276, "step": 1946000 }, { "epoch": 12.22, "learning_rate": 1.9456210521028244e-05, "loss": 3.0187, "step": 1946500 }, { "epoch": 12.23, "learning_rate": 1.944836181888963e-05, "loss": 3.0181, "step": 1947000 }, { "epoch": 12.23, "learning_rate": 1.9440528814155293e-05, "loss": 3.0217, "step": 1947500 }, { "epoch": 12.23, "learning_rate": 1.9432680112016678e-05, "loss": 3.0161, "step": 1948000 }, { "epoch": 12.23, "learning_rate": 1.9424831409878063e-05, "loss": 3.0249, "step": 1948500 }, { "epoch": 12.24, "learning_rate": 1.9416982707739448e-05, "loss": 3.0183, "step": 1949000 }, { "epoch": 12.24, "learning_rate": 1.9409149703005112e-05, "loss": 3.0182, "step": 1949500 }, { "epoch": 12.24, "learning_rate": 1.9401301000866497e-05, "loss": 3.0187, "step": 1950000 }, { "epoch": 12.24, "eval_accuracy": 0.45576093240394727, "eval_loss": 2.9470033645629883, "eval_runtime": 1451.121, "eval_samples_per_second": 57.749, "eval_steps_per_second": 5.776, "step": 1950000 }, { "epoch": 12.25, "learning_rate": 1.9393452298727882e-05, "loss": 3.0174, "step": 1950500 }, { "epoch": 12.25, "learning_rate": 1.938560359658927e-05, "loss": 3.0261, "step": 1951000 }, { "epoch": 12.25, "learning_rate": 1.9377754894450655e-05, "loss": 3.0273, "step": 1951500 }, { "epoch": 12.26, "learning_rate": 1.936990619231204e-05, "loss": 3.015, "step": 1952000 }, { "epoch": 12.26, "learning_rate": 1.9362057490173428e-05, "loss": 3.0118, "step": 1952500 }, { "epoch": 12.26, "learning_rate": 1.9354208788034812e-05, "loss": 3.0151, "step": 1953000 }, { "epoch": 12.27, "learning_rate": 1.9346360085896197e-05, "loss": 3.0165, "step": 1953500 }, { "epoch": 12.27, "learning_rate": 1.9338527081161862e-05, "loss": 3.026, "step": 1954000 }, { "epoch": 12.27, "learning_rate": 1.9330678379023247e-05, "loss": 3.0266, "step": 1954500 }, { "epoch": 12.28, "learning_rate": 1.932282967688463e-05, "loss": 3.022, "step": 1955000 }, { "epoch": 12.28, "learning_rate": 1.9314980974746016e-05, "loss": 3.0231, "step": 1955500 }, { "epoch": 12.28, "learning_rate": 1.9307132272607404e-05, "loss": 3.0125, "step": 1956000 }, { "epoch": 12.28, "learning_rate": 1.9299299267873066e-05, "loss": 3.0202, "step": 1956500 }, { "epoch": 12.29, "learning_rate": 1.929145056573445e-05, "loss": 3.0217, "step": 1957000 }, { "epoch": 12.29, "learning_rate": 1.928360186359584e-05, "loss": 3.0136, "step": 1957500 }, { "epoch": 12.29, "learning_rate": 1.92757688588615e-05, "loss": 3.0276, "step": 1958000 }, { "epoch": 12.3, "learning_rate": 1.9267920156722885e-05, "loss": 3.0252, "step": 1958500 }, { "epoch": 12.3, "learning_rate": 1.9260071454584273e-05, "loss": 3.0275, "step": 1959000 }, { "epoch": 12.3, "learning_rate": 1.9252222752445658e-05, "loss": 3.0224, "step": 1959500 }, { "epoch": 12.31, "learning_rate": 1.9244374050307042e-05, "loss": 3.0225, "step": 1960000 }, { "epoch": 12.31, "learning_rate": 1.9236525348168427e-05, "loss": 3.0257, "step": 1960500 }, { "epoch": 12.31, "learning_rate": 1.9228676646029815e-05, "loss": 3.0196, "step": 1961000 }, { "epoch": 12.32, "learning_rate": 1.9220827943891196e-05, "loss": 3.0252, "step": 1961500 }, { "epoch": 12.32, "learning_rate": 1.9212979241752585e-05, "loss": 3.0156, "step": 1962000 }, { "epoch": 12.32, "learning_rate": 1.920514623701825e-05, "loss": 3.0213, "step": 1962500 }, { "epoch": 12.33, "learning_rate": 1.9197297534879634e-05, "loss": 3.0157, "step": 1963000 }, { "epoch": 12.33, "learning_rate": 1.918944883274102e-05, "loss": 3.0202, "step": 1963500 }, { "epoch": 12.33, "learning_rate": 1.9181600130602404e-05, "loss": 3.0231, "step": 1964000 }, { "epoch": 12.34, "learning_rate": 1.917376712586807e-05, "loss": 3.0259, "step": 1964500 }, { "epoch": 12.34, "learning_rate": 1.916593412113373e-05, "loss": 3.0187, "step": 1965000 }, { "epoch": 12.34, "learning_rate": 1.9158085418995118e-05, "loss": 3.0259, "step": 1965500 }, { "epoch": 12.34, "learning_rate": 1.9150236716856503e-05, "loss": 3.0285, "step": 1966000 }, { "epoch": 12.35, "learning_rate": 1.9142388014717887e-05, "loss": 3.0183, "step": 1966500 }, { "epoch": 12.35, "learning_rate": 1.9134539312579272e-05, "loss": 3.0231, "step": 1967000 }, { "epoch": 12.35, "learning_rate": 1.912669061044066e-05, "loss": 3.0219, "step": 1967500 }, { "epoch": 12.36, "learning_rate": 1.911885760570632e-05, "loss": 3.0192, "step": 1968000 }, { "epoch": 12.36, "learning_rate": 1.9111008903567706e-05, "loss": 3.0212, "step": 1968500 }, { "epoch": 12.36, "learning_rate": 1.9103160201429094e-05, "loss": 3.013, "step": 1969000 }, { "epoch": 12.37, "learning_rate": 1.9095311499290476e-05, "loss": 3.0235, "step": 1969500 }, { "epoch": 12.37, "learning_rate": 1.9087462797151864e-05, "loss": 3.0256, "step": 1970000 }, { "epoch": 12.37, "learning_rate": 1.907961409501325e-05, "loss": 3.0235, "step": 1970500 }, { "epoch": 12.38, "learning_rate": 1.9071765392874637e-05, "loss": 3.0212, "step": 1971000 }, { "epoch": 12.38, "learning_rate": 1.9063916690736018e-05, "loss": 3.0195, "step": 1971500 }, { "epoch": 12.38, "learning_rate": 1.9056067988597406e-05, "loss": 3.0195, "step": 1972000 }, { "epoch": 12.39, "learning_rate": 1.9048219286458794e-05, "loss": 3.0208, "step": 1972500 }, { "epoch": 12.39, "learning_rate": 1.9040370584320176e-05, "loss": 3.0197, "step": 1973000 }, { "epoch": 12.39, "learning_rate": 1.9032521882181564e-05, "loss": 3.0219, "step": 1973500 }, { "epoch": 12.39, "learning_rate": 1.902468887744723e-05, "loss": 3.0226, "step": 1974000 }, { "epoch": 12.4, "learning_rate": 1.9016840175308613e-05, "loss": 3.021, "step": 1974500 }, { "epoch": 12.4, "learning_rate": 1.9009007170574275e-05, "loss": 3.0152, "step": 1975000 }, { "epoch": 12.4, "learning_rate": 1.900115846843566e-05, "loss": 3.0179, "step": 1975500 }, { "epoch": 12.41, "learning_rate": 1.899332546370132e-05, "loss": 3.0265, "step": 1976000 }, { "epoch": 12.41, "learning_rate": 1.898547676156271e-05, "loss": 3.0232, "step": 1976500 }, { "epoch": 12.41, "learning_rate": 1.8977628059424094e-05, "loss": 3.0145, "step": 1977000 }, { "epoch": 12.42, "learning_rate": 1.8969779357285482e-05, "loss": 3.0214, "step": 1977500 }, { "epoch": 12.42, "learning_rate": 1.8961930655146863e-05, "loss": 3.0204, "step": 1978000 }, { "epoch": 12.42, "learning_rate": 1.895408195300825e-05, "loss": 3.0256, "step": 1978500 }, { "epoch": 12.43, "learning_rate": 1.894623325086964e-05, "loss": 3.0175, "step": 1979000 }, { "epoch": 12.43, "learning_rate": 1.893838454873102e-05, "loss": 3.0196, "step": 1979500 }, { "epoch": 12.43, "learning_rate": 1.893053584659241e-05, "loss": 3.0189, "step": 1980000 }, { "epoch": 12.43, "eval_accuracy": 0.45600211262488755, "eval_loss": 2.946021795272827, "eval_runtime": 1451.2836, "eval_samples_per_second": 57.743, "eval_steps_per_second": 5.775, "step": 1980000 }, { "epoch": 12.44, "learning_rate": 1.8922687144453794e-05, "loss": 3.0182, "step": 1980500 }, { "epoch": 12.44, "learning_rate": 1.8914854139719455e-05, "loss": 3.0229, "step": 1981000 }, { "epoch": 12.44, "learning_rate": 1.8907005437580843e-05, "loss": 3.0259, "step": 1981500 }, { "epoch": 12.44, "learning_rate": 1.8899156735442228e-05, "loss": 3.022, "step": 1982000 }, { "epoch": 12.45, "learning_rate": 1.8891308033303616e-05, "loss": 3.0158, "step": 1982500 }, { "epoch": 12.45, "learning_rate": 1.8883459331164997e-05, "loss": 3.0195, "step": 1983000 }, { "epoch": 12.45, "learning_rate": 1.8875626326430662e-05, "loss": 3.0233, "step": 1983500 }, { "epoch": 12.46, "learning_rate": 1.886777762429205e-05, "loss": 3.018, "step": 1984000 }, { "epoch": 12.46, "learning_rate": 1.885992892215343e-05, "loss": 3.0155, "step": 1984500 }, { "epoch": 12.46, "learning_rate": 1.885208022001482e-05, "loss": 3.0264, "step": 1985000 }, { "epoch": 12.47, "learning_rate": 1.8844231517876205e-05, "loss": 3.0229, "step": 1985500 }, { "epoch": 12.47, "learning_rate": 1.8836398513141866e-05, "loss": 3.0242, "step": 1986000 }, { "epoch": 12.47, "learning_rate": 1.882856550840753e-05, "loss": 3.0243, "step": 1986500 }, { "epoch": 12.48, "learning_rate": 1.882071680626892e-05, "loss": 3.029, "step": 1987000 }, { "epoch": 12.48, "learning_rate": 1.88128681041303e-05, "loss": 3.0225, "step": 1987500 }, { "epoch": 12.48, "learning_rate": 1.8805019401991688e-05, "loss": 3.0207, "step": 1988000 }, { "epoch": 12.49, "learning_rate": 1.8797170699853073e-05, "loss": 3.0193, "step": 1988500 }, { "epoch": 12.49, "learning_rate": 1.878932199771446e-05, "loss": 3.0214, "step": 1989000 }, { "epoch": 12.49, "learning_rate": 1.8781473295575843e-05, "loss": 3.0237, "step": 1989500 }, { "epoch": 12.5, "learning_rate": 1.877362459343723e-05, "loss": 3.0181, "step": 1990000 }, { "epoch": 12.5, "learning_rate": 1.8765791588702895e-05, "loss": 3.0205, "step": 1990500 }, { "epoch": 12.5, "learning_rate": 1.8757942886564277e-05, "loss": 3.0208, "step": 1991000 }, { "epoch": 12.5, "learning_rate": 1.8750094184425665e-05, "loss": 3.0219, "step": 1991500 }, { "epoch": 12.51, "learning_rate": 1.874226117969133e-05, "loss": 3.0234, "step": 1992000 }, { "epoch": 12.51, "learning_rate": 1.873441247755271e-05, "loss": 3.0187, "step": 1992500 }, { "epoch": 12.51, "learning_rate": 1.87265637754141e-05, "loss": 3.0208, "step": 1993000 }, { "epoch": 12.52, "learning_rate": 1.8718715073275484e-05, "loss": 3.0186, "step": 1993500 }, { "epoch": 12.52, "learning_rate": 1.8710866371136872e-05, "loss": 3.0245, "step": 1994000 }, { "epoch": 12.52, "learning_rate": 1.8703017668998253e-05, "loss": 3.0282, "step": 1994500 }, { "epoch": 12.53, "learning_rate": 1.869516896685964e-05, "loss": 3.0226, "step": 1995000 }, { "epoch": 12.53, "learning_rate": 1.8687320264721026e-05, "loss": 3.0224, "step": 1995500 }, { "epoch": 12.53, "learning_rate": 1.867947156258241e-05, "loss": 3.0136, "step": 1996000 }, { "epoch": 12.54, "learning_rate": 1.86716228604438e-05, "loss": 3.0172, "step": 1996500 }, { "epoch": 12.54, "learning_rate": 1.866378985570946e-05, "loss": 3.0208, "step": 1997000 }, { "epoch": 12.54, "learning_rate": 1.8655941153570845e-05, "loss": 3.0173, "step": 1997500 }, { "epoch": 12.55, "learning_rate": 1.864809245143223e-05, "loss": 3.0183, "step": 1998000 }, { "epoch": 12.55, "learning_rate": 1.8640243749293618e-05, "loss": 3.023, "step": 1998500 }, { "epoch": 12.55, "learning_rate": 1.8632395047155006e-05, "loss": 3.0265, "step": 1999000 }, { "epoch": 12.55, "learning_rate": 1.8624562042420664e-05, "loss": 3.0175, "step": 1999500 }, { "epoch": 12.56, "learning_rate": 1.861672903768633e-05, "loss": 3.0197, "step": 2000000 }, { "epoch": 12.56, "learning_rate": 1.8608880335547714e-05, "loss": 3.0229, "step": 2000500 }, { "epoch": 12.56, "learning_rate": 1.86010316334091e-05, "loss": 3.0203, "step": 2001000 }, { "epoch": 12.57, "learning_rate": 1.8593182931270487e-05, "loss": 3.0198, "step": 2001500 }, { "epoch": 12.57, "learning_rate": 1.858533422913187e-05, "loss": 3.0265, "step": 2002000 }, { "epoch": 12.57, "learning_rate": 1.8577501224397533e-05, "loss": 3.0265, "step": 2002500 }, { "epoch": 12.58, "learning_rate": 1.856965252225892e-05, "loss": 3.0306, "step": 2003000 }, { "epoch": 12.58, "learning_rate": 1.8561803820120306e-05, "loss": 3.0151, "step": 2003500 }, { "epoch": 12.58, "learning_rate": 1.855395511798169e-05, "loss": 3.0159, "step": 2004000 }, { "epoch": 12.59, "learning_rate": 1.8546106415843075e-05, "loss": 3.0259, "step": 2004500 }, { "epoch": 12.59, "learning_rate": 1.8538257713704463e-05, "loss": 3.0193, "step": 2005000 }, { "epoch": 12.59, "learning_rate": 1.853040901156585e-05, "loss": 3.0219, "step": 2005500 }, { "epoch": 12.6, "learning_rate": 1.8522560309427233e-05, "loss": 3.0171, "step": 2006000 }, { "epoch": 12.6, "learning_rate": 1.851471160728862e-05, "loss": 3.0255, "step": 2006500 }, { "epoch": 12.6, "learning_rate": 1.8506862905150006e-05, "loss": 3.0207, "step": 2007000 }, { "epoch": 12.61, "learning_rate": 1.849901420301139e-05, "loss": 3.0198, "step": 2007500 }, { "epoch": 12.61, "learning_rate": 1.8491165500872775e-05, "loss": 3.0286, "step": 2008000 }, { "epoch": 12.61, "learning_rate": 1.848333249613844e-05, "loss": 3.02, "step": 2008500 }, { "epoch": 12.61, "learning_rate": 1.8475483793999825e-05, "loss": 3.0171, "step": 2009000 }, { "epoch": 12.62, "learning_rate": 1.846765078926549e-05, "loss": 3.0205, "step": 2009500 }, { "epoch": 12.62, "learning_rate": 1.8459802087126874e-05, "loss": 3.0228, "step": 2010000 }, { "epoch": 12.62, "eval_accuracy": 0.4561948958281899, "eval_loss": 2.944429874420166, "eval_runtime": 1449.7407, "eval_samples_per_second": 57.804, "eval_steps_per_second": 5.781, "step": 2010000 }, { "epoch": 12.62, "learning_rate": 1.845195338498826e-05, "loss": 3.0207, "step": 2010500 }, { "epoch": 12.63, "learning_rate": 1.8444104682849643e-05, "loss": 3.0243, "step": 2011000 }, { "epoch": 12.63, "learning_rate": 1.843625598071103e-05, "loss": 3.0245, "step": 2011500 }, { "epoch": 12.63, "learning_rate": 1.8428407278572416e-05, "loss": 3.0134, "step": 2012000 }, { "epoch": 12.64, "learning_rate": 1.84205585764338e-05, "loss": 3.0168, "step": 2012500 }, { "epoch": 12.64, "learning_rate": 1.8412709874295186e-05, "loss": 3.022, "step": 2013000 }, { "epoch": 12.64, "learning_rate": 1.8404861172156574e-05, "loss": 3.0174, "step": 2013500 }, { "epoch": 12.65, "learning_rate": 1.8397028167422235e-05, "loss": 3.0213, "step": 2014000 }, { "epoch": 12.65, "learning_rate": 1.838917946528362e-05, "loss": 3.0247, "step": 2014500 }, { "epoch": 12.65, "learning_rate": 1.8381330763145008e-05, "loss": 3.0297, "step": 2015000 }, { "epoch": 12.66, "learning_rate": 1.8373482061006393e-05, "loss": 3.0214, "step": 2015500 }, { "epoch": 12.66, "learning_rate": 1.8365633358867778e-05, "loss": 3.0175, "step": 2016000 }, { "epoch": 12.66, "learning_rate": 1.8357800354133442e-05, "loss": 3.0242, "step": 2016500 }, { "epoch": 12.66, "learning_rate": 1.8349951651994827e-05, "loss": 3.0166, "step": 2017000 }, { "epoch": 12.67, "learning_rate": 1.8342102949856212e-05, "loss": 3.03, "step": 2017500 }, { "epoch": 12.67, "learning_rate": 1.8334254247717597e-05, "loss": 3.0222, "step": 2018000 }, { "epoch": 12.67, "learning_rate": 1.8326405545578985e-05, "loss": 3.0227, "step": 2018500 }, { "epoch": 12.68, "learning_rate": 1.8318572540844646e-05, "loss": 3.0184, "step": 2019000 }, { "epoch": 12.68, "learning_rate": 1.831072383870603e-05, "loss": 3.0232, "step": 2019500 }, { "epoch": 12.68, "learning_rate": 1.830287513656742e-05, "loss": 3.0235, "step": 2020000 }, { "epoch": 12.69, "learning_rate": 1.8295026434428804e-05, "loss": 3.0165, "step": 2020500 }, { "epoch": 12.69, "learning_rate": 1.828717773229019e-05, "loss": 3.0241, "step": 2021000 }, { "epoch": 12.69, "learning_rate": 1.8279344727555853e-05, "loss": 3.0192, "step": 2021500 }, { "epoch": 12.7, "learning_rate": 1.8271496025417238e-05, "loss": 3.0234, "step": 2022000 }, { "epoch": 12.7, "learning_rate": 1.8263647323278623e-05, "loss": 3.0271, "step": 2022500 }, { "epoch": 12.7, "learning_rate": 1.8255798621140008e-05, "loss": 3.0189, "step": 2023000 }, { "epoch": 12.71, "learning_rate": 1.8247949919001396e-05, "loss": 3.0187, "step": 2023500 }, { "epoch": 12.71, "learning_rate": 1.824010121686278e-05, "loss": 3.0185, "step": 2024000 }, { "epoch": 12.71, "learning_rate": 1.8232252514724165e-05, "loss": 3.0206, "step": 2024500 }, { "epoch": 12.71, "learning_rate": 1.8224403812585553e-05, "loss": 3.0219, "step": 2025000 }, { "epoch": 12.72, "learning_rate": 1.8216570807851215e-05, "loss": 3.0219, "step": 2025500 }, { "epoch": 12.72, "learning_rate": 1.8208737803116876e-05, "loss": 3.0171, "step": 2026000 }, { "epoch": 12.72, "learning_rate": 1.8200889100978264e-05, "loss": 3.0214, "step": 2026500 }, { "epoch": 12.73, "learning_rate": 1.819304039883965e-05, "loss": 3.0206, "step": 2027000 }, { "epoch": 12.73, "learning_rate": 1.8185191696701034e-05, "loss": 3.0248, "step": 2027500 }, { "epoch": 12.73, "learning_rate": 1.8177342994562422e-05, "loss": 3.0273, "step": 2028000 }, { "epoch": 12.74, "learning_rate": 1.8169509989828083e-05, "loss": 3.0214, "step": 2028500 }, { "epoch": 12.74, "learning_rate": 1.8161661287689468e-05, "loss": 3.0212, "step": 2029000 }, { "epoch": 12.74, "learning_rate": 1.8153828282955133e-05, "loss": 3.0222, "step": 2029500 }, { "epoch": 12.75, "learning_rate": 1.8145979580816517e-05, "loss": 3.031, "step": 2030000 }, { "epoch": 12.75, "learning_rate": 1.8138130878677902e-05, "loss": 3.0129, "step": 2030500 }, { "epoch": 12.75, "learning_rate": 1.8130282176539287e-05, "loss": 3.0255, "step": 2031000 }, { "epoch": 12.76, "learning_rate": 1.8122433474400675e-05, "loss": 3.0247, "step": 2031500 }, { "epoch": 12.76, "learning_rate": 1.811458477226206e-05, "loss": 3.0189, "step": 2032000 }, { "epoch": 12.76, "learning_rate": 1.8106736070123444e-05, "loss": 3.0249, "step": 2032500 }, { "epoch": 12.77, "learning_rate": 1.8098887367984833e-05, "loss": 3.0291, "step": 2033000 }, { "epoch": 12.77, "learning_rate": 1.8091038665846217e-05, "loss": 3.0213, "step": 2033500 }, { "epoch": 12.77, "learning_rate": 1.8083189963707602e-05, "loss": 3.022, "step": 2034000 }, { "epoch": 12.77, "learning_rate": 1.8075341261568987e-05, "loss": 3.0191, "step": 2034500 }, { "epoch": 12.78, "learning_rate": 1.806750825683465e-05, "loss": 3.0213, "step": 2035000 }, { "epoch": 12.78, "learning_rate": 1.8059659554696036e-05, "loss": 3.0209, "step": 2035500 }, { "epoch": 12.78, "learning_rate": 1.805181085255742e-05, "loss": 3.0218, "step": 2036000 }, { "epoch": 12.79, "learning_rate": 1.804396215041881e-05, "loss": 3.0243, "step": 2036500 }, { "epoch": 12.79, "learning_rate": 1.8036113448280194e-05, "loss": 3.0142, "step": 2037000 }, { "epoch": 12.79, "learning_rate": 1.8028280443545855e-05, "loss": 3.0212, "step": 2037500 }, { "epoch": 12.8, "learning_rate": 1.8020431741407243e-05, "loss": 3.0199, "step": 2038000 }, { "epoch": 12.8, "learning_rate": 1.8012583039268628e-05, "loss": 3.0159, "step": 2038500 }, { "epoch": 12.8, "learning_rate": 1.8004734337130013e-05, "loss": 3.0286, "step": 2039000 }, { "epoch": 12.81, "learning_rate": 1.7996885634991398e-05, "loss": 3.0234, "step": 2039500 }, { "epoch": 12.81, "learning_rate": 1.7989036932852786e-05, "loss": 3.0243, "step": 2040000 }, { "epoch": 12.81, "eval_accuracy": 0.45648273502009945, "eval_loss": 2.94172739982605, "eval_runtime": 1449.5237, "eval_samples_per_second": 57.813, "eval_steps_per_second": 5.782, "step": 2040000 }, { "epoch": 12.81, "learning_rate": 1.7981188230714167e-05, "loss": 3.0271, "step": 2040500 }, { "epoch": 12.82, "learning_rate": 1.7973355225979832e-05, "loss": 3.0178, "step": 2041000 }, { "epoch": 12.82, "learning_rate": 1.796550652384122e-05, "loss": 3.0214, "step": 2041500 }, { "epoch": 12.82, "learning_rate": 1.7957657821702605e-05, "loss": 3.0235, "step": 2042000 }, { "epoch": 12.82, "learning_rate": 1.794980911956399e-05, "loss": 3.0231, "step": 2042500 }, { "epoch": 12.83, "learning_rate": 1.7941960417425374e-05, "loss": 3.0186, "step": 2043000 }, { "epoch": 12.83, "learning_rate": 1.7934111715286762e-05, "loss": 3.0186, "step": 2043500 }, { "epoch": 12.83, "learning_rate": 1.7926278710552424e-05, "loss": 3.0216, "step": 2044000 }, { "epoch": 12.84, "learning_rate": 1.791843000841381e-05, "loss": 3.0194, "step": 2044500 }, { "epoch": 12.84, "learning_rate": 1.7910581306275197e-05, "loss": 3.0234, "step": 2045000 }, { "epoch": 12.84, "learning_rate": 1.7902748301540858e-05, "loss": 3.025, "step": 2045500 }, { "epoch": 12.85, "learning_rate": 1.7894899599402243e-05, "loss": 3.028, "step": 2046000 }, { "epoch": 12.85, "learning_rate": 1.788705089726363e-05, "loss": 3.0154, "step": 2046500 }, { "epoch": 12.85, "learning_rate": 1.7879202195125012e-05, "loss": 3.0229, "step": 2047000 }, { "epoch": 12.86, "learning_rate": 1.78713534929864e-05, "loss": 3.0215, "step": 2047500 }, { "epoch": 12.86, "learning_rate": 1.786350479084779e-05, "loss": 3.0278, "step": 2048000 }, { "epoch": 12.86, "learning_rate": 1.7855656088709173e-05, "loss": 3.0269, "step": 2048500 }, { "epoch": 12.87, "learning_rate": 1.7847807386570558e-05, "loss": 3.0198, "step": 2049000 }, { "epoch": 12.87, "learning_rate": 1.7839958684431943e-05, "loss": 3.0144, "step": 2049500 }, { "epoch": 12.87, "learning_rate": 1.783210998229333e-05, "loss": 3.0213, "step": 2050000 }, { "epoch": 12.88, "learning_rate": 1.7824261280154712e-05, "loss": 3.0153, "step": 2050500 }, { "epoch": 12.88, "learning_rate": 1.78164125780161e-05, "loss": 3.0193, "step": 2051000 }, { "epoch": 12.88, "learning_rate": 1.780859527068604e-05, "loss": 3.0254, "step": 2051500 }, { "epoch": 12.88, "learning_rate": 1.7800746568547423e-05, "loss": 3.0242, "step": 2052000 }, { "epoch": 12.89, "learning_rate": 1.779289786640881e-05, "loss": 3.0243, "step": 2052500 }, { "epoch": 12.89, "learning_rate": 1.77850491642702e-05, "loss": 3.0142, "step": 2053000 }, { "epoch": 12.89, "learning_rate": 1.7777216159535857e-05, "loss": 3.0242, "step": 2053500 }, { "epoch": 12.9, "learning_rate": 1.7769367457397245e-05, "loss": 3.0078, "step": 2054000 }, { "epoch": 12.9, "learning_rate": 1.7761518755258634e-05, "loss": 3.0156, "step": 2054500 }, { "epoch": 12.9, "learning_rate": 1.7753670053120018e-05, "loss": 3.0185, "step": 2055000 }, { "epoch": 12.91, "learning_rate": 1.7745821350981403e-05, "loss": 3.0189, "step": 2055500 }, { "epoch": 12.91, "learning_rate": 1.7737972648842788e-05, "loss": 3.0248, "step": 2056000 }, { "epoch": 12.91, "learning_rate": 1.7730123946704176e-05, "loss": 3.0252, "step": 2056500 }, { "epoch": 12.92, "learning_rate": 1.7722275244565557e-05, "loss": 3.02, "step": 2057000 }, { "epoch": 12.92, "learning_rate": 1.7714426542426945e-05, "loss": 3.0211, "step": 2057500 }, { "epoch": 12.92, "learning_rate": 1.770657784028833e-05, "loss": 3.0207, "step": 2058000 }, { "epoch": 12.93, "learning_rate": 1.7698729138149718e-05, "loss": 3.0228, "step": 2058500 }, { "epoch": 12.93, "learning_rate": 1.76908804360111e-05, "loss": 3.013, "step": 2059000 }, { "epoch": 12.93, "learning_rate": 1.7683047431276764e-05, "loss": 3.0198, "step": 2059500 }, { "epoch": 12.93, "learning_rate": 1.7675198729138152e-05, "loss": 3.0257, "step": 2060000 }, { "epoch": 12.94, "learning_rate": 1.7667365724403814e-05, "loss": 3.0176, "step": 2060500 }, { "epoch": 12.94, "learning_rate": 1.76595170222652e-05, "loss": 3.0139, "step": 2061000 }, { "epoch": 12.94, "learning_rate": 1.7651668320126587e-05, "loss": 3.0176, "step": 2061500 }, { "epoch": 12.95, "learning_rate": 1.7643835315392248e-05, "loss": 3.0283, "step": 2062000 }, { "epoch": 12.95, "learning_rate": 1.7635986613253633e-05, "loss": 3.0215, "step": 2062500 }, { "epoch": 12.95, "learning_rate": 1.762813791111502e-05, "loss": 3.0246, "step": 2063000 }, { "epoch": 12.96, "learning_rate": 1.7620304906380682e-05, "loss": 3.0159, "step": 2063500 }, { "epoch": 12.96, "learning_rate": 1.7612456204242067e-05, "loss": 3.024, "step": 2064000 }, { "epoch": 12.96, "learning_rate": 1.7604607502103455e-05, "loss": 3.0134, "step": 2064500 }, { "epoch": 12.97, "learning_rate": 1.7596758799964837e-05, "loss": 3.0326, "step": 2065000 }, { "epoch": 12.97, "learning_rate": 1.7588910097826225e-05, "loss": 3.0237, "step": 2065500 }, { "epoch": 12.97, "learning_rate": 1.758106139568761e-05, "loss": 3.0259, "step": 2066000 }, { "epoch": 12.98, "learning_rate": 1.7573212693548998e-05, "loss": 3.0143, "step": 2066500 }, { "epoch": 12.98, "learning_rate": 1.756536399141038e-05, "loss": 3.02, "step": 2067000 }, { "epoch": 12.98, "learning_rate": 1.7557515289271767e-05, "loss": 3.0164, "step": 2067500 }, { "epoch": 12.98, "learning_rate": 1.7549682284537432e-05, "loss": 3.0225, "step": 2068000 }, { "epoch": 12.99, "learning_rate": 1.7541833582398813e-05, "loss": 3.0239, "step": 2068500 }, { "epoch": 12.99, "learning_rate": 1.75339848802602e-05, "loss": 3.0183, "step": 2069000 }, { "epoch": 12.99, "learning_rate": 1.7526136178121586e-05, "loss": 3.0194, "step": 2069500 }, { "epoch": 13.0, "learning_rate": 1.751828747598297e-05, "loss": 3.0229, "step": 2070000 }, { "epoch": 13.0, "eval_accuracy": 0.45672158229249127, "eval_loss": 2.939502000808716, "eval_runtime": 1449.8428, "eval_samples_per_second": 57.8, "eval_steps_per_second": 5.781, "step": 2070000 }, { "epoch": 13.0, "learning_rate": 1.751043877384436e-05, "loss": 3.0129, "step": 2070500 }, { "epoch": 13.0, "learning_rate": 1.7502590071705744e-05, "loss": 3.0015, "step": 2071000 }, { "epoch": 13.01, "learning_rate": 1.7494741369567132e-05, "loss": 3.0099, "step": 2071500 }, { "epoch": 13.01, "learning_rate": 1.7486892667428513e-05, "loss": 3.0065, "step": 2072000 }, { "epoch": 13.01, "learning_rate": 1.74790439652899e-05, "loss": 3.0002, "step": 2072500 }, { "epoch": 13.02, "learning_rate": 1.7471195263151286e-05, "loss": 3.0035, "step": 2073000 }, { "epoch": 13.02, "learning_rate": 1.746334656101267e-05, "loss": 3.0004, "step": 2073500 }, { "epoch": 13.02, "learning_rate": 1.7455497858874055e-05, "loss": 3.0065, "step": 2074000 }, { "epoch": 13.03, "learning_rate": 1.744766485413972e-05, "loss": 3.0065, "step": 2074500 }, { "epoch": 13.03, "learning_rate": 1.7439816152001105e-05, "loss": 3.0054, "step": 2075000 }, { "epoch": 13.03, "learning_rate": 1.743196744986249e-05, "loss": 3.0098, "step": 2075500 }, { "epoch": 13.04, "learning_rate": 1.7424134445128154e-05, "loss": 3.0079, "step": 2076000 }, { "epoch": 13.04, "learning_rate": 1.7416285742989543e-05, "loss": 3.0083, "step": 2076500 }, { "epoch": 13.04, "learning_rate": 1.7408452738255204e-05, "loss": 3.0059, "step": 2077000 }, { "epoch": 13.04, "learning_rate": 1.740060403611659e-05, "loss": 3.0093, "step": 2077500 }, { "epoch": 13.05, "learning_rate": 1.7392755333977977e-05, "loss": 2.9999, "step": 2078000 }, { "epoch": 13.05, "learning_rate": 1.7384906631839358e-05, "loss": 3.0121, "step": 2078500 }, { "epoch": 13.05, "learning_rate": 1.7377057929700746e-05, "loss": 3.0056, "step": 2079000 }, { "epoch": 13.06, "learning_rate": 1.736920922756213e-05, "loss": 3.0059, "step": 2079500 }, { "epoch": 13.06, "learning_rate": 1.7361360525423516e-05, "loss": 3.0052, "step": 2080000 }, { "epoch": 13.06, "learning_rate": 1.73535118232849e-05, "loss": 3.0099, "step": 2080500 }, { "epoch": 13.07, "learning_rate": 1.734566312114629e-05, "loss": 3.0054, "step": 2081000 }, { "epoch": 13.07, "learning_rate": 1.7337814419007673e-05, "loss": 3.0029, "step": 2081500 }, { "epoch": 13.07, "learning_rate": 1.7329965716869058e-05, "loss": 3.0034, "step": 2082000 }, { "epoch": 13.08, "learning_rate": 1.7322117014730446e-05, "loss": 3.0131, "step": 2082500 }, { "epoch": 13.08, "learning_rate": 1.731426831259183e-05, "loss": 3.005, "step": 2083000 }, { "epoch": 13.08, "learning_rate": 1.7306435307857492e-05, "loss": 3.0087, "step": 2083500 }, { "epoch": 13.09, "learning_rate": 1.7298586605718877e-05, "loss": 3.0096, "step": 2084000 }, { "epoch": 13.09, "learning_rate": 1.7290737903580265e-05, "loss": 3.0086, "step": 2084500 }, { "epoch": 13.09, "learning_rate": 1.728288920144165e-05, "loss": 3.0069, "step": 2085000 }, { "epoch": 13.09, "learning_rate": 1.7275040499303035e-05, "loss": 3.0031, "step": 2085500 }, { "epoch": 13.1, "learning_rate": 1.7267191797164423e-05, "loss": 3.0036, "step": 2086000 }, { "epoch": 13.1, "learning_rate": 1.7259343095025808e-05, "loss": 3.0083, "step": 2086500 }, { "epoch": 13.1, "learning_rate": 1.7251494392887192e-05, "loss": 3.0075, "step": 2087000 }, { "epoch": 13.11, "learning_rate": 1.7243677085557134e-05, "loss": 3.0138, "step": 2087500 }, { "epoch": 13.11, "learning_rate": 1.723582838341852e-05, "loss": 3.0051, "step": 2088000 }, { "epoch": 13.11, "learning_rate": 1.7227979681279903e-05, "loss": 3.008, "step": 2088500 }, { "epoch": 13.12, "learning_rate": 1.722013097914129e-05, "loss": 3.0112, "step": 2089000 }, { "epoch": 13.12, "learning_rate": 1.7212282277002676e-05, "loss": 3.0101, "step": 2089500 }, { "epoch": 13.12, "learning_rate": 1.720443357486406e-05, "loss": 3.0118, "step": 2090000 }, { "epoch": 13.13, "learning_rate": 1.7196584872725446e-05, "loss": 3.001, "step": 2090500 }, { "epoch": 13.13, "learning_rate": 1.718875186799111e-05, "loss": 3.0049, "step": 2091000 }, { "epoch": 13.13, "learning_rate": 1.7180903165852495e-05, "loss": 3.0049, "step": 2091500 }, { "epoch": 13.14, "learning_rate": 1.717305446371388e-05, "loss": 3.0115, "step": 2092000 }, { "epoch": 13.14, "learning_rate": 1.7165205761575268e-05, "loss": 3.0081, "step": 2092500 }, { "epoch": 13.14, "learning_rate": 1.7157357059436653e-05, "loss": 3.0055, "step": 2093000 }, { "epoch": 13.15, "learning_rate": 1.7149524054702314e-05, "loss": 3.0059, "step": 2093500 }, { "epoch": 13.15, "learning_rate": 1.7141675352563702e-05, "loss": 3.0079, "step": 2094000 }, { "epoch": 13.15, "learning_rate": 1.7133826650425087e-05, "loss": 3.009, "step": 2094500 }, { "epoch": 13.15, "learning_rate": 1.712597794828647e-05, "loss": 3.0103, "step": 2095000 }, { "epoch": 13.16, "learning_rate": 1.7118129246147856e-05, "loss": 3.0108, "step": 2095500 }, { "epoch": 13.16, "learning_rate": 1.7110280544009245e-05, "loss": 3.0095, "step": 2096000 }, { "epoch": 13.16, "learning_rate": 1.7102447539274906e-05, "loss": 3.01, "step": 2096500 }, { "epoch": 13.17, "learning_rate": 1.709459883713629e-05, "loss": 3.009, "step": 2097000 }, { "epoch": 13.17, "learning_rate": 1.708675013499768e-05, "loss": 3.0051, "step": 2097500 }, { "epoch": 13.17, "learning_rate": 1.7078901432859064e-05, "loss": 3.0124, "step": 2098000 }, { "epoch": 13.18, "learning_rate": 1.7071052730720448e-05, "loss": 3.0133, "step": 2098500 }, { "epoch": 13.18, "learning_rate": 1.7063204028581833e-05, "loss": 3.0073, "step": 2099000 }, { "epoch": 13.18, "learning_rate": 1.705535532644322e-05, "loss": 3.01, "step": 2099500 }, { "epoch": 13.19, "learning_rate": 1.7047506624304606e-05, "loss": 3.0062, "step": 2100000 }, { "epoch": 13.19, "eval_accuracy": 0.4569340322520572, "eval_loss": 2.9391539096832275, "eval_runtime": 1450.2516, "eval_samples_per_second": 57.784, "eval_steps_per_second": 5.779, "step": 2100000 }, { "epoch": 13.19, "learning_rate": 1.7039673619570267e-05, "loss": 3.0095, "step": 2100500 }, { "epoch": 13.19, "learning_rate": 1.7031840614835932e-05, "loss": 3.0147, "step": 2101000 }, { "epoch": 13.2, "learning_rate": 1.7023991912697317e-05, "loss": 3.0087, "step": 2101500 }, { "epoch": 13.2, "learning_rate": 1.70161432105587e-05, "loss": 3.0066, "step": 2102000 }, { "epoch": 13.2, "learning_rate": 1.700829450842009e-05, "loss": 3.0111, "step": 2102500 }, { "epoch": 13.2, "learning_rate": 1.7000445806281474e-05, "loss": 3.0094, "step": 2103000 }, { "epoch": 13.21, "learning_rate": 1.699259710414286e-05, "loss": 3.0138, "step": 2103500 }, { "epoch": 13.21, "learning_rate": 1.6984748402004244e-05, "loss": 3.0063, "step": 2104000 }, { "epoch": 13.21, "learning_rate": 1.6976899699865632e-05, "loss": 3.0082, "step": 2104500 }, { "epoch": 13.22, "learning_rate": 1.6969066695131293e-05, "loss": 3.0095, "step": 2105000 }, { "epoch": 13.22, "learning_rate": 1.6961217992992678e-05, "loss": 3.0085, "step": 2105500 }, { "epoch": 13.22, "learning_rate": 1.6953369290854066e-05, "loss": 3.0047, "step": 2106000 }, { "epoch": 13.23, "learning_rate": 1.694552058871545e-05, "loss": 3.0113, "step": 2106500 }, { "epoch": 13.23, "learning_rate": 1.6937687583981112e-05, "loss": 3.0132, "step": 2107000 }, { "epoch": 13.23, "learning_rate": 1.69298388818425e-05, "loss": 3.0141, "step": 2107500 }, { "epoch": 13.24, "learning_rate": 1.6922005877108162e-05, "loss": 3.0075, "step": 2108000 }, { "epoch": 13.24, "learning_rate": 1.6914157174969547e-05, "loss": 3.0108, "step": 2108500 }, { "epoch": 13.24, "learning_rate": 1.6906308472830935e-05, "loss": 3.0096, "step": 2109000 }, { "epoch": 13.25, "learning_rate": 1.689845977069232e-05, "loss": 3.022, "step": 2109500 }, { "epoch": 13.25, "learning_rate": 1.6890611068553704e-05, "loss": 3.0052, "step": 2110000 }, { "epoch": 13.25, "learning_rate": 1.688276236641509e-05, "loss": 3.0159, "step": 2110500 }, { "epoch": 13.25, "learning_rate": 1.6874913664276477e-05, "loss": 3.0076, "step": 2111000 }, { "epoch": 13.26, "learning_rate": 1.6867064962137862e-05, "loss": 3.0123, "step": 2111500 }, { "epoch": 13.26, "learning_rate": 1.6859231957403523e-05, "loss": 3.0116, "step": 2112000 }, { "epoch": 13.26, "learning_rate": 1.685138325526491e-05, "loss": 3.01, "step": 2112500 }, { "epoch": 13.27, "learning_rate": 1.6843534553126296e-05, "loss": 3.0042, "step": 2113000 }, { "epoch": 13.27, "learning_rate": 1.683568585098768e-05, "loss": 3.0113, "step": 2113500 }, { "epoch": 13.27, "learning_rate": 1.682783714884907e-05, "loss": 3.0062, "step": 2114000 }, { "epoch": 13.28, "learning_rate": 1.682000414411473e-05, "loss": 3.011, "step": 2114500 }, { "epoch": 13.28, "learning_rate": 1.6812155441976115e-05, "loss": 3.0133, "step": 2115000 }, { "epoch": 13.28, "learning_rate": 1.6804306739837503e-05, "loss": 3.0074, "step": 2115500 }, { "epoch": 13.29, "learning_rate": 1.6796458037698888e-05, "loss": 3.0037, "step": 2116000 }, { "epoch": 13.29, "learning_rate": 1.6788609335560273e-05, "loss": 3.0147, "step": 2116500 }, { "epoch": 13.29, "learning_rate": 1.6780760633421657e-05, "loss": 3.0101, "step": 2117000 }, { "epoch": 13.3, "learning_rate": 1.6772927628687322e-05, "loss": 3.014, "step": 2117500 }, { "epoch": 13.3, "learning_rate": 1.6765078926548707e-05, "loss": 3.0114, "step": 2118000 }, { "epoch": 13.3, "learning_rate": 1.675723022441009e-05, "loss": 3.0114, "step": 2118500 }, { "epoch": 13.31, "learning_rate": 1.674938152227148e-05, "loss": 3.0104, "step": 2119000 }, { "epoch": 13.31, "learning_rate": 1.6741532820132865e-05, "loss": 3.012, "step": 2119500 }, { "epoch": 13.31, "learning_rate": 1.673368411799425e-05, "loss": 3.0155, "step": 2120000 }, { "epoch": 13.31, "learning_rate": 1.6725851113259914e-05, "loss": 3.0075, "step": 2120500 }, { "epoch": 13.32, "learning_rate": 1.67180024111213e-05, "loss": 3.0087, "step": 2121000 }, { "epoch": 13.32, "learning_rate": 1.6710153708982683e-05, "loss": 3.0145, "step": 2121500 }, { "epoch": 13.32, "learning_rate": 1.6702305006844068e-05, "loss": 3.0162, "step": 2122000 }, { "epoch": 13.33, "learning_rate": 1.6694472002109733e-05, "loss": 3.0121, "step": 2122500 }, { "epoch": 13.33, "learning_rate": 1.6686623299971118e-05, "loss": 3.0071, "step": 2123000 }, { "epoch": 13.33, "learning_rate": 1.6678774597832502e-05, "loss": 3.0151, "step": 2123500 }, { "epoch": 13.34, "learning_rate": 1.667092589569389e-05, "loss": 3.0131, "step": 2124000 }, { "epoch": 13.34, "learning_rate": 1.6663077193555275e-05, "loss": 3.0119, "step": 2124500 }, { "epoch": 13.34, "learning_rate": 1.665522849141666e-05, "loss": 3.0143, "step": 2125000 }, { "epoch": 13.35, "learning_rate": 1.6647379789278045e-05, "loss": 3.0127, "step": 2125500 }, { "epoch": 13.35, "learning_rate": 1.6639531087139433e-05, "loss": 3.0056, "step": 2126000 }, { "epoch": 13.35, "learning_rate": 1.6631698082405094e-05, "loss": 3.0184, "step": 2126500 }, { "epoch": 13.36, "learning_rate": 1.662384938026648e-05, "loss": 3.0063, "step": 2127000 }, { "epoch": 13.36, "learning_rate": 1.6616016375532144e-05, "loss": 3.013, "step": 2127500 }, { "epoch": 13.36, "learning_rate": 1.660816767339353e-05, "loss": 3.0135, "step": 2128000 }, { "epoch": 13.36, "learning_rate": 1.6600318971254913e-05, "loss": 3.0179, "step": 2128500 }, { "epoch": 13.37, "learning_rate": 1.65924702691163e-05, "loss": 3.0135, "step": 2129000 }, { "epoch": 13.37, "learning_rate": 1.6584621566977683e-05, "loss": 3.0122, "step": 2129500 }, { "epoch": 13.37, "learning_rate": 1.6576788562243348e-05, "loss": 3.0075, "step": 2130000 }, { "epoch": 13.37, "eval_accuracy": 0.4570965337832005, "eval_loss": 2.93697190284729, "eval_runtime": 1450.9539, "eval_samples_per_second": 57.756, "eval_steps_per_second": 5.776, "step": 2130000 }, { "epoch": 13.38, "learning_rate": 1.6568939860104736e-05, "loss": 3.0166, "step": 2130500 }, { "epoch": 13.38, "learning_rate": 1.656109115796612e-05, "loss": 3.0104, "step": 2131000 }, { "epoch": 13.38, "learning_rate": 1.6553242455827505e-05, "loss": 3.0109, "step": 2131500 }, { "epoch": 13.39, "learning_rate": 1.654539375368889e-05, "loss": 3.013, "step": 2132000 }, { "epoch": 13.39, "learning_rate": 1.6537545051550278e-05, "loss": 3.0103, "step": 2132500 }, { "epoch": 13.39, "learning_rate": 1.652969634941166e-05, "loss": 3.0156, "step": 2133000 }, { "epoch": 13.4, "learning_rate": 1.6521863344677324e-05, "loss": 3.0109, "step": 2133500 }, { "epoch": 13.4, "learning_rate": 1.6514014642538712e-05, "loss": 3.0153, "step": 2134000 }, { "epoch": 13.4, "learning_rate": 1.6506181637804374e-05, "loss": 3.0164, "step": 2134500 }, { "epoch": 13.41, "learning_rate": 1.649833293566576e-05, "loss": 3.003, "step": 2135000 }, { "epoch": 13.41, "learning_rate": 1.6490484233527147e-05, "loss": 3.0173, "step": 2135500 }, { "epoch": 13.41, "learning_rate": 1.6482635531388528e-05, "loss": 3.0162, "step": 2136000 }, { "epoch": 13.42, "learning_rate": 1.6474786829249916e-05, "loss": 3.018, "step": 2136500 }, { "epoch": 13.42, "learning_rate": 1.64669381271113e-05, "loss": 3.0195, "step": 2137000 }, { "epoch": 13.42, "learning_rate": 1.645908942497269e-05, "loss": 3.0109, "step": 2137500 }, { "epoch": 13.42, "learning_rate": 1.6451240722834074e-05, "loss": 3.0097, "step": 2138000 }, { "epoch": 13.43, "learning_rate": 1.644339202069546e-05, "loss": 3.0155, "step": 2138500 }, { "epoch": 13.43, "learning_rate": 1.6435543318556846e-05, "loss": 3.0099, "step": 2139000 }, { "epoch": 13.43, "learning_rate": 1.6427694616418228e-05, "loss": 3.0131, "step": 2139500 }, { "epoch": 13.44, "learning_rate": 1.6419861611683893e-05, "loss": 3.0089, "step": 2140000 }, { "epoch": 13.44, "learning_rate": 1.6412028606949557e-05, "loss": 3.0031, "step": 2140500 }, { "epoch": 13.44, "learning_rate": 1.640417990481094e-05, "loss": 3.0117, "step": 2141000 }, { "epoch": 13.45, "learning_rate": 1.6396331202672327e-05, "loss": 3.019, "step": 2141500 }, { "epoch": 13.45, "learning_rate": 1.638848250053371e-05, "loss": 3.0076, "step": 2142000 }, { "epoch": 13.45, "learning_rate": 1.63806337983951e-05, "loss": 3.0133, "step": 2142500 }, { "epoch": 13.46, "learning_rate": 1.6372785096256484e-05, "loss": 3.0128, "step": 2143000 }, { "epoch": 13.46, "learning_rate": 1.6364952091522146e-05, "loss": 3.011, "step": 2143500 }, { "epoch": 13.46, "learning_rate": 1.6357103389383534e-05, "loss": 3.0152, "step": 2144000 }, { "epoch": 13.47, "learning_rate": 1.634925468724492e-05, "loss": 3.0182, "step": 2144500 }, { "epoch": 13.47, "learning_rate": 1.6341405985106303e-05, "loss": 3.0137, "step": 2145000 }, { "epoch": 13.47, "learning_rate": 1.633355728296769e-05, "loss": 3.0123, "step": 2145500 }, { "epoch": 13.47, "learning_rate": 1.6325708580829073e-05, "loss": 3.0118, "step": 2146000 }, { "epoch": 13.48, "learning_rate": 1.631785987869046e-05, "loss": 3.0102, "step": 2146500 }, { "epoch": 13.48, "learning_rate": 1.6310011176551846e-05, "loss": 3.0121, "step": 2147000 }, { "epoch": 13.48, "learning_rate": 1.6302162474413234e-05, "loss": 3.013, "step": 2147500 }, { "epoch": 13.49, "learning_rate": 1.6294313772274615e-05, "loss": 3.0129, "step": 2148000 }, { "epoch": 13.49, "learning_rate": 1.6286465070136003e-05, "loss": 3.0117, "step": 2148500 }, { "epoch": 13.49, "learning_rate": 1.6278632065401668e-05, "loss": 3.0102, "step": 2149000 }, { "epoch": 13.5, "learning_rate": 1.627078336326305e-05, "loss": 3.0137, "step": 2149500 }, { "epoch": 13.5, "learning_rate": 1.6262934661124438e-05, "loss": 3.0069, "step": 2150000 }, { "epoch": 13.5, "learning_rate": 1.6255101656390102e-05, "loss": 3.0072, "step": 2150500 }, { "epoch": 13.51, "learning_rate": 1.6247252954251484e-05, "loss": 3.01, "step": 2151000 }, { "epoch": 13.51, "learning_rate": 1.6239404252112872e-05, "loss": 3.0117, "step": 2151500 }, { "epoch": 13.51, "learning_rate": 1.6231571247378537e-05, "loss": 3.018, "step": 2152000 }, { "epoch": 13.52, "learning_rate": 1.6223722545239918e-05, "loss": 3.0078, "step": 2152500 }, { "epoch": 13.52, "learning_rate": 1.6215873843101306e-05, "loss": 3.017, "step": 2153000 }, { "epoch": 13.52, "learning_rate": 1.620802514096269e-05, "loss": 3.0107, "step": 2153500 }, { "epoch": 13.52, "learning_rate": 1.620017643882408e-05, "loss": 3.0137, "step": 2154000 }, { "epoch": 13.53, "learning_rate": 1.619232773668546e-05, "loss": 3.0119, "step": 2154500 }, { "epoch": 13.53, "learning_rate": 1.618447903454685e-05, "loss": 3.0196, "step": 2155000 }, { "epoch": 13.53, "learning_rate": 1.6176630332408233e-05, "loss": 3.0099, "step": 2155500 }, { "epoch": 13.54, "learning_rate": 1.6168781630269618e-05, "loss": 3.0135, "step": 2156000 }, { "epoch": 13.54, "learning_rate": 1.6160932928131006e-05, "loss": 3.0143, "step": 2156500 }, { "epoch": 13.54, "learning_rate": 1.615308422599239e-05, "loss": 3.0101, "step": 2157000 }, { "epoch": 13.55, "learning_rate": 1.6145235523853776e-05, "loss": 3.0055, "step": 2157500 }, { "epoch": 13.55, "learning_rate": 1.613738682171516e-05, "loss": 3.0115, "step": 2158000 }, { "epoch": 13.55, "learning_rate": 1.612953811957655e-05, "loss": 3.0049, "step": 2158500 }, { "epoch": 13.56, "learning_rate": 1.6121705114842213e-05, "loss": 3.0144, "step": 2159000 }, { "epoch": 13.56, "learning_rate": 1.6113856412703595e-05, "loss": 3.0037, "step": 2159500 }, { "epoch": 13.56, "learning_rate": 1.6106007710564983e-05, "loss": 3.0093, "step": 2160000 }, { "epoch": 13.56, "eval_accuracy": 0.45744130858443527, "eval_loss": 2.9350316524505615, "eval_runtime": 1443.7257, "eval_samples_per_second": 58.045, "eval_steps_per_second": 5.805, "step": 2160000 }, { "epoch": 13.57, "learning_rate": 1.6098174705830647e-05, "loss": 3.0172, "step": 2160500 }, { "epoch": 13.57, "learning_rate": 1.609032600369203e-05, "loss": 3.0119, "step": 2161000 }, { "epoch": 13.57, "learning_rate": 1.6082477301553417e-05, "loss": 3.0158, "step": 2161500 }, { "epoch": 13.58, "learning_rate": 1.60746285994148e-05, "loss": 3.0097, "step": 2162000 }, { "epoch": 13.58, "learning_rate": 1.6066795594680463e-05, "loss": 3.0144, "step": 2162500 }, { "epoch": 13.58, "learning_rate": 1.605894689254185e-05, "loss": 3.0057, "step": 2163000 }, { "epoch": 13.58, "learning_rate": 1.6051113887807513e-05, "loss": 3.0122, "step": 2163500 }, { "epoch": 13.59, "learning_rate": 1.6043265185668897e-05, "loss": 3.0156, "step": 2164000 }, { "epoch": 13.59, "learning_rate": 1.6035416483530285e-05, "loss": 3.0096, "step": 2164500 }, { "epoch": 13.59, "learning_rate": 1.602756778139167e-05, "loss": 3.0083, "step": 2165000 }, { "epoch": 13.6, "learning_rate": 1.6019719079253058e-05, "loss": 3.0104, "step": 2165500 }, { "epoch": 13.6, "learning_rate": 1.601187037711444e-05, "loss": 3.013, "step": 2166000 }, { "epoch": 13.6, "learning_rate": 1.6004021674975828e-05, "loss": 3.0152, "step": 2166500 }, { "epoch": 13.61, "learning_rate": 1.5996172972837212e-05, "loss": 3.009, "step": 2167000 }, { "epoch": 13.61, "learning_rate": 1.5988324270698597e-05, "loss": 3.0109, "step": 2167500 }, { "epoch": 13.61, "learning_rate": 1.5980475568559982e-05, "loss": 3.014, "step": 2168000 }, { "epoch": 13.62, "learning_rate": 1.597262686642137e-05, "loss": 3.0056, "step": 2168500 }, { "epoch": 13.62, "learning_rate": 1.5964778164282755e-05, "loss": 3.015, "step": 2169000 }, { "epoch": 13.62, "learning_rate": 1.595692946214414e-05, "loss": 3.0087, "step": 2169500 }, { "epoch": 13.63, "learning_rate": 1.594911215481408e-05, "loss": 3.0177, "step": 2170000 }, { "epoch": 13.63, "learning_rate": 1.5941263452675466e-05, "loss": 3.013, "step": 2170500 }, { "epoch": 13.63, "learning_rate": 1.593341475053685e-05, "loss": 3.0104, "step": 2171000 }, { "epoch": 13.63, "learning_rate": 1.592556604839824e-05, "loss": 3.0096, "step": 2171500 }, { "epoch": 13.64, "learning_rate": 1.5917717346259623e-05, "loss": 3.0073, "step": 2172000 }, { "epoch": 13.64, "learning_rate": 1.5909868644121008e-05, "loss": 3.0132, "step": 2172500 }, { "epoch": 13.64, "learning_rate": 1.5902019941982393e-05, "loss": 3.0079, "step": 2173000 }, { "epoch": 13.65, "learning_rate": 1.589417123984378e-05, "loss": 3.0075, "step": 2173500 }, { "epoch": 13.65, "learning_rate": 1.5886322537705166e-05, "loss": 3.0114, "step": 2174000 }, { "epoch": 13.65, "learning_rate": 1.587847383556655e-05, "loss": 3.0139, "step": 2174500 }, { "epoch": 13.66, "learning_rate": 1.587062513342794e-05, "loss": 3.0044, "step": 2175000 }, { "epoch": 13.66, "learning_rate": 1.5862776431289323e-05, "loss": 3.0153, "step": 2175500 }, { "epoch": 13.66, "learning_rate": 1.5854927729150708e-05, "loss": 3.0117, "step": 2176000 }, { "epoch": 13.67, "learning_rate": 1.5847079027012093e-05, "loss": 3.0132, "step": 2176500 }, { "epoch": 13.67, "learning_rate": 1.583923032487348e-05, "loss": 3.005, "step": 2177000 }, { "epoch": 13.67, "learning_rate": 1.5831381622734866e-05, "loss": 3.0139, "step": 2177500 }, { "epoch": 13.68, "learning_rate": 1.5823548618000527e-05, "loss": 3.0105, "step": 2178000 }, { "epoch": 13.68, "learning_rate": 1.5815699915861915e-05, "loss": 3.0075, "step": 2178500 }, { "epoch": 13.68, "learning_rate": 1.58078512137233e-05, "loss": 3.0036, "step": 2179000 }, { "epoch": 13.68, "learning_rate": 1.580001820898896e-05, "loss": 3.0086, "step": 2179500 }, { "epoch": 13.69, "learning_rate": 1.5792185204254626e-05, "loss": 3.0204, "step": 2180000 }, { "epoch": 13.69, "learning_rate": 1.578433650211601e-05, "loss": 3.0115, "step": 2180500 }, { "epoch": 13.69, "learning_rate": 1.5776487799977396e-05, "loss": 3.0059, "step": 2181000 }, { "epoch": 13.7, "learning_rate": 1.5768639097838784e-05, "loss": 3.0149, "step": 2181500 }, { "epoch": 13.7, "learning_rate": 1.5760806093104445e-05, "loss": 3.0156, "step": 2182000 }, { "epoch": 13.7, "learning_rate": 1.575295739096583e-05, "loss": 3.0078, "step": 2182500 }, { "epoch": 13.71, "learning_rate": 1.5745108688827218e-05, "loss": 3.0114, "step": 2183000 }, { "epoch": 13.71, "learning_rate": 1.5737259986688603e-05, "loss": 3.017, "step": 2183500 }, { "epoch": 13.71, "learning_rate": 1.5729411284549987e-05, "loss": 3.0178, "step": 2184000 }, { "epoch": 13.72, "learning_rate": 1.5721562582411372e-05, "loss": 3.0094, "step": 2184500 }, { "epoch": 13.72, "learning_rate": 1.571371388027276e-05, "loss": 3.0137, "step": 2185000 }, { "epoch": 13.72, "learning_rate": 1.570588087553842e-05, "loss": 3.0108, "step": 2185500 }, { "epoch": 13.73, "learning_rate": 1.5698032173399806e-05, "loss": 3.0132, "step": 2186000 }, { "epoch": 13.73, "learning_rate": 1.5690183471261194e-05, "loss": 3.0216, "step": 2186500 }, { "epoch": 13.73, "learning_rate": 1.568233476912258e-05, "loss": 3.0085, "step": 2187000 }, { "epoch": 13.74, "learning_rate": 1.5674486066983964e-05, "loss": 3.0124, "step": 2187500 }, { "epoch": 13.74, "learning_rate": 1.566663736484535e-05, "loss": 3.0156, "step": 2188000 }, { "epoch": 13.74, "learning_rate": 1.5658788662706737e-05, "loss": 3.0086, "step": 2188500 }, { "epoch": 13.74, "learning_rate": 1.5650955657972398e-05, "loss": 3.0085, "step": 2189000 }, { "epoch": 13.75, "learning_rate": 1.5643106955833783e-05, "loss": 3.0121, "step": 2189500 }, { "epoch": 13.75, "learning_rate": 1.563525825369517e-05, "loss": 3.0095, "step": 2190000 }, { "epoch": 13.75, "eval_accuracy": 0.45760267863553256, "eval_loss": 2.9335360527038574, "eval_runtime": 1442.9946, "eval_samples_per_second": 58.074, "eval_steps_per_second": 5.808, "step": 2190000 }, { "epoch": 13.75, "learning_rate": 1.5627409551556556e-05, "loss": 3.012, "step": 2190500 }, { "epoch": 13.76, "learning_rate": 1.561956084941794e-05, "loss": 3.0087, "step": 2191000 }, { "epoch": 13.76, "learning_rate": 1.5611712147279325e-05, "loss": 3.0196, "step": 2191500 }, { "epoch": 13.76, "learning_rate": 1.5603863445140713e-05, "loss": 3.0159, "step": 2192000 }, { "epoch": 13.77, "learning_rate": 1.5596014743002098e-05, "loss": 3.0142, "step": 2192500 }, { "epoch": 13.77, "learning_rate": 1.5588166040863483e-05, "loss": 3.0141, "step": 2193000 }, { "epoch": 13.77, "learning_rate": 1.558031733872487e-05, "loss": 3.0071, "step": 2193500 }, { "epoch": 13.78, "learning_rate": 1.5572468636586256e-05, "loss": 3.0154, "step": 2194000 }, { "epoch": 13.78, "learning_rate": 1.556461993444764e-05, "loss": 3.0142, "step": 2194500 }, { "epoch": 13.78, "learning_rate": 1.5556771232309025e-05, "loss": 3.0149, "step": 2195000 }, { "epoch": 13.79, "learning_rate": 1.554893822757469e-05, "loss": 3.0009, "step": 2195500 }, { "epoch": 13.79, "learning_rate": 1.5541089525436075e-05, "loss": 3.0165, "step": 2196000 }, { "epoch": 13.79, "learning_rate": 1.553324082329746e-05, "loss": 3.0154, "step": 2196500 }, { "epoch": 13.79, "learning_rate": 1.5525392121158848e-05, "loss": 3.0138, "step": 2197000 }, { "epoch": 13.8, "learning_rate": 1.551754341902023e-05, "loss": 3.0126, "step": 2197500 }, { "epoch": 13.8, "learning_rate": 1.5509694716881617e-05, "loss": 3.0037, "step": 2198000 }, { "epoch": 13.8, "learning_rate": 1.5501861712147282e-05, "loss": 3.017, "step": 2198500 }, { "epoch": 13.81, "learning_rate": 1.5494013010008667e-05, "loss": 3.0143, "step": 2199000 }, { "epoch": 13.81, "learning_rate": 1.548616430787005e-05, "loss": 3.0129, "step": 2199500 }, { "epoch": 13.81, "learning_rate": 1.5478315605731436e-05, "loss": 3.0102, "step": 2200000 }, { "epoch": 13.82, "learning_rate": 1.5470466903592824e-05, "loss": 3.0221, "step": 2200500 }, { "epoch": 13.82, "learning_rate": 1.5462618201454206e-05, "loss": 3.0079, "step": 2201000 }, { "epoch": 13.82, "learning_rate": 1.5454769499315594e-05, "loss": 3.0097, "step": 2201500 }, { "epoch": 13.83, "learning_rate": 1.544693649458126e-05, "loss": 3.0066, "step": 2202000 }, { "epoch": 13.83, "learning_rate": 1.543908779244264e-05, "loss": 3.0134, "step": 2202500 }, { "epoch": 13.83, "learning_rate": 1.5431239090304028e-05, "loss": 3.0107, "step": 2203000 }, { "epoch": 13.84, "learning_rate": 1.5423390388165413e-05, "loss": 3.0187, "step": 2203500 }, { "epoch": 13.84, "learning_rate": 1.54155416860268e-05, "loss": 3.0081, "step": 2204000 }, { "epoch": 13.84, "learning_rate": 1.5407692983888186e-05, "loss": 3.0008, "step": 2204500 }, { "epoch": 13.85, "learning_rate": 1.539984428174957e-05, "loss": 3.006, "step": 2205000 }, { "epoch": 13.85, "learning_rate": 1.539199557961096e-05, "loss": 3.0104, "step": 2205500 }, { "epoch": 13.85, "learning_rate": 1.538416257487662e-05, "loss": 3.0112, "step": 2206000 }, { "epoch": 13.85, "learning_rate": 1.5376313872738005e-05, "loss": 3.0141, "step": 2206500 }, { "epoch": 13.86, "learning_rate": 1.5368465170599393e-05, "loss": 3.0111, "step": 2207000 }, { "epoch": 13.86, "learning_rate": 1.5360616468460774e-05, "loss": 3.0081, "step": 2207500 }, { "epoch": 13.86, "learning_rate": 1.5352767766322162e-05, "loss": 3.0104, "step": 2208000 }, { "epoch": 13.87, "learning_rate": 1.5344919064183547e-05, "loss": 3.0074, "step": 2208500 }, { "epoch": 13.87, "learning_rate": 1.5337086059449208e-05, "loss": 3.0109, "step": 2209000 }, { "epoch": 13.87, "learning_rate": 1.5329237357310596e-05, "loss": 3.0135, "step": 2209500 }, { "epoch": 13.88, "learning_rate": 1.532138865517198e-05, "loss": 3.0114, "step": 2210000 }, { "epoch": 13.88, "learning_rate": 1.531353995303337e-05, "loss": 3.0078, "step": 2210500 }, { "epoch": 13.88, "learning_rate": 1.530569125089475e-05, "loss": 3.0175, "step": 2211000 }, { "epoch": 13.89, "learning_rate": 1.529784254875614e-05, "loss": 3.0112, "step": 2211500 }, { "epoch": 13.89, "learning_rate": 1.5289993846617523e-05, "loss": 3.0123, "step": 2212000 }, { "epoch": 13.89, "learning_rate": 1.5282160841883185e-05, "loss": 3.0081, "step": 2212500 }, { "epoch": 13.9, "learning_rate": 1.5274312139744573e-05, "loss": 3.0108, "step": 2213000 }, { "epoch": 13.9, "learning_rate": 1.5266463437605958e-05, "loss": 3.0071, "step": 2213500 }, { "epoch": 13.9, "learning_rate": 1.5258614735467344e-05, "loss": 3.0078, "step": 2214000 }, { "epoch": 13.9, "learning_rate": 1.5250766033328729e-05, "loss": 3.0095, "step": 2214500 }, { "epoch": 13.91, "learning_rate": 1.5242917331190115e-05, "loss": 3.0137, "step": 2215000 }, { "epoch": 13.91, "learning_rate": 1.5235084326455778e-05, "loss": 3.0161, "step": 2215500 }, { "epoch": 13.91, "learning_rate": 1.5227235624317163e-05, "loss": 3.0136, "step": 2216000 }, { "epoch": 13.92, "learning_rate": 1.521938692217855e-05, "loss": 3.0099, "step": 2216500 }, { "epoch": 13.92, "learning_rate": 1.5211538220039934e-05, "loss": 3.0056, "step": 2217000 }, { "epoch": 13.92, "learning_rate": 1.520368951790132e-05, "loss": 3.0145, "step": 2217500 }, { "epoch": 13.93, "learning_rate": 1.5195856513166984e-05, "loss": 3.0101, "step": 2218000 }, { "epoch": 13.93, "learning_rate": 1.5188007811028369e-05, "loss": 3.0154, "step": 2218500 }, { "epoch": 13.93, "learning_rate": 1.5180159108889755e-05, "loss": 3.0047, "step": 2219000 }, { "epoch": 13.94, "learning_rate": 1.517231040675114e-05, "loss": 3.0203, "step": 2219500 }, { "epoch": 13.94, "learning_rate": 1.5164461704612526e-05, "loss": 3.0103, "step": 2220000 }, { "epoch": 13.94, "eval_accuracy": 0.4578338738366854, "eval_loss": 2.9320759773254395, "eval_runtime": 1447.3201, "eval_samples_per_second": 57.901, "eval_steps_per_second": 5.791, "step": 2220000 }, { "epoch": 13.94, "learning_rate": 1.515662869987819e-05, "loss": 3.0094, "step": 2220500 }, { "epoch": 13.95, "learning_rate": 1.5148779997739574e-05, "loss": 3.0132, "step": 2221000 }, { "epoch": 13.95, "learning_rate": 1.514093129560096e-05, "loss": 3.0106, "step": 2221500 }, { "epoch": 13.95, "learning_rate": 1.5133082593462347e-05, "loss": 3.0123, "step": 2222000 }, { "epoch": 13.95, "learning_rate": 1.5125233891323732e-05, "loss": 3.0008, "step": 2222500 }, { "epoch": 13.96, "learning_rate": 1.5117385189185118e-05, "loss": 3.0048, "step": 2223000 }, { "epoch": 13.96, "learning_rate": 1.510955218445078e-05, "loss": 3.0094, "step": 2223500 }, { "epoch": 13.96, "learning_rate": 1.5101719179716442e-05, "loss": 3.0128, "step": 2224000 }, { "epoch": 13.97, "learning_rate": 1.5093870477577829e-05, "loss": 3.0118, "step": 2224500 }, { "epoch": 13.97, "learning_rate": 1.5086021775439214e-05, "loss": 3.0169, "step": 2225000 }, { "epoch": 13.97, "learning_rate": 1.50781730733006e-05, "loss": 3.0061, "step": 2225500 }, { "epoch": 13.98, "learning_rate": 1.5070324371161985e-05, "loss": 3.0156, "step": 2226000 }, { "epoch": 13.98, "learning_rate": 1.5062475669023371e-05, "loss": 3.0138, "step": 2226500 }, { "epoch": 13.98, "learning_rate": 1.5054626966884758e-05, "loss": 3.0082, "step": 2227000 }, { "epoch": 13.99, "learning_rate": 1.5046778264746142e-05, "loss": 3.0124, "step": 2227500 }, { "epoch": 13.99, "learning_rate": 1.5038945260011806e-05, "loss": 3.0094, "step": 2228000 }, { "epoch": 13.99, "learning_rate": 1.5031096557873192e-05, "loss": 3.0127, "step": 2228500 }, { "epoch": 14.0, "learning_rate": 1.5023247855734577e-05, "loss": 3.0151, "step": 2229000 }, { "epoch": 14.0, "learning_rate": 1.5015399153595963e-05, "loss": 3.0185, "step": 2229500 }, { "epoch": 14.0, "learning_rate": 1.5007550451457348e-05, "loss": 3.0085, "step": 2230000 }, { "epoch": 14.01, "learning_rate": 1.4999717446723011e-05, "loss": 2.9966, "step": 2230500 }, { "epoch": 14.01, "learning_rate": 1.4991868744584397e-05, "loss": 2.9962, "step": 2231000 }, { "epoch": 14.01, "learning_rate": 1.4984020042445782e-05, "loss": 2.9988, "step": 2231500 }, { "epoch": 14.01, "learning_rate": 1.4976171340307169e-05, "loss": 2.9972, "step": 2232000 }, { "epoch": 14.02, "learning_rate": 1.4968322638168553e-05, "loss": 2.9926, "step": 2232500 }, { "epoch": 14.02, "learning_rate": 1.4960489633434216e-05, "loss": 2.99, "step": 2233000 }, { "epoch": 14.02, "learning_rate": 1.4952640931295603e-05, "loss": 2.9976, "step": 2233500 }, { "epoch": 14.03, "learning_rate": 1.4944792229156988e-05, "loss": 2.9974, "step": 2234000 }, { "epoch": 14.03, "learning_rate": 1.4936943527018374e-05, "loss": 2.9949, "step": 2234500 }, { "epoch": 14.03, "learning_rate": 1.4929110522284037e-05, "loss": 2.9966, "step": 2235000 }, { "epoch": 14.04, "learning_rate": 1.4921261820145422e-05, "loss": 2.9966, "step": 2235500 }, { "epoch": 14.04, "learning_rate": 1.4913413118006808e-05, "loss": 2.9911, "step": 2236000 }, { "epoch": 14.04, "learning_rate": 1.4905564415868193e-05, "loss": 2.996, "step": 2236500 }, { "epoch": 14.05, "learning_rate": 1.489771571372958e-05, "loss": 2.9952, "step": 2237000 }, { "epoch": 14.05, "learning_rate": 1.4889867011590964e-05, "loss": 2.996, "step": 2237500 }, { "epoch": 14.05, "learning_rate": 1.488201830945235e-05, "loss": 2.9964, "step": 2238000 }, { "epoch": 14.06, "learning_rate": 1.4874185304718014e-05, "loss": 2.9944, "step": 2238500 }, { "epoch": 14.06, "learning_rate": 1.4866336602579398e-05, "loss": 2.9969, "step": 2239000 }, { "epoch": 14.06, "learning_rate": 1.4858487900440785e-05, "loss": 2.9987, "step": 2239500 }, { "epoch": 14.06, "learning_rate": 1.4850639198302168e-05, "loss": 2.9958, "step": 2240000 }, { "epoch": 14.07, "learning_rate": 1.4842806193567833e-05, "loss": 3.0007, "step": 2240500 }, { "epoch": 14.07, "learning_rate": 1.4834957491429219e-05, "loss": 3.0014, "step": 2241000 }, { "epoch": 14.07, "learning_rate": 1.4827108789290604e-05, "loss": 2.9978, "step": 2241500 }, { "epoch": 14.08, "learning_rate": 1.481926008715199e-05, "loss": 2.9986, "step": 2242000 }, { "epoch": 14.08, "learning_rate": 1.4811411385013373e-05, "loss": 2.9873, "step": 2242500 }, { "epoch": 14.08, "learning_rate": 1.4803562682874761e-05, "loss": 2.9971, "step": 2243000 }, { "epoch": 14.09, "learning_rate": 1.4795729678140424e-05, "loss": 2.9988, "step": 2243500 }, { "epoch": 14.09, "learning_rate": 1.4787880976001807e-05, "loss": 3.0015, "step": 2244000 }, { "epoch": 14.09, "learning_rate": 1.4780047971267472e-05, "loss": 2.9965, "step": 2244500 }, { "epoch": 14.1, "learning_rate": 1.4772199269128859e-05, "loss": 3.0035, "step": 2245000 }, { "epoch": 14.1, "learning_rate": 1.4764350566990243e-05, "loss": 3.0022, "step": 2245500 }, { "epoch": 14.1, "learning_rate": 1.475650186485163e-05, "loss": 2.9956, "step": 2246000 }, { "epoch": 14.11, "learning_rate": 1.4748653162713013e-05, "loss": 3.0002, "step": 2246500 }, { "epoch": 14.11, "learning_rate": 1.4740804460574401e-05, "loss": 3.0045, "step": 2247000 }, { "epoch": 14.11, "learning_rate": 1.4732955758435784e-05, "loss": 3.0019, "step": 2247500 }, { "epoch": 14.12, "learning_rate": 1.4725107056297172e-05, "loss": 2.9997, "step": 2248000 }, { "epoch": 14.12, "learning_rate": 1.4717274051562835e-05, "loss": 3.0062, "step": 2248500 }, { "epoch": 14.12, "learning_rate": 1.4709425349424218e-05, "loss": 3.0066, "step": 2249000 }, { "epoch": 14.12, "learning_rate": 1.4701576647285606e-05, "loss": 3.0005, "step": 2249500 }, { "epoch": 14.13, "learning_rate": 1.469372794514699e-05, "loss": 3.0005, "step": 2250000 }, { "epoch": 14.13, "eval_accuracy": 0.4580670753735899, "eval_loss": 2.931225538253784, "eval_runtime": 1450.4788, "eval_samples_per_second": 57.775, "eval_steps_per_second": 5.778, "step": 2250000 }, { "epoch": 14.13, "learning_rate": 1.4685879243008378e-05, "loss": 2.9983, "step": 2250500 }, { "epoch": 14.13, "learning_rate": 1.4678030540869764e-05, "loss": 3.0042, "step": 2251000 }, { "epoch": 14.14, "learning_rate": 1.4670197536135424e-05, "loss": 3.0041, "step": 2251500 }, { "epoch": 14.14, "learning_rate": 1.4662348833996812e-05, "loss": 3.0002, "step": 2252000 }, { "epoch": 14.14, "learning_rate": 1.4654500131858195e-05, "loss": 3.0047, "step": 2252500 }, { "epoch": 14.15, "learning_rate": 1.4646651429719583e-05, "loss": 3.0011, "step": 2253000 }, { "epoch": 14.15, "learning_rate": 1.463880272758097e-05, "loss": 2.9912, "step": 2253500 }, { "epoch": 14.15, "learning_rate": 1.4630954025442353e-05, "loss": 2.9986, "step": 2254000 }, { "epoch": 14.16, "learning_rate": 1.462310532330374e-05, "loss": 2.9996, "step": 2254500 }, { "epoch": 14.16, "learning_rate": 1.4615256621165124e-05, "loss": 3.0, "step": 2255000 }, { "epoch": 14.16, "learning_rate": 1.4607407919026512e-05, "loss": 2.9971, "step": 2255500 }, { "epoch": 14.17, "learning_rate": 1.4599559216887895e-05, "loss": 2.9986, "step": 2256000 }, { "epoch": 14.17, "learning_rate": 1.4591726212153558e-05, "loss": 2.9945, "step": 2256500 }, { "epoch": 14.17, "learning_rate": 1.4583877510014946e-05, "loss": 3.0018, "step": 2257000 }, { "epoch": 14.17, "learning_rate": 1.4576028807876329e-05, "loss": 2.9944, "step": 2257500 }, { "epoch": 14.18, "learning_rate": 1.4568195803141992e-05, "loss": 2.994, "step": 2258000 }, { "epoch": 14.18, "learning_rate": 1.456034710100338e-05, "loss": 3.006, "step": 2258500 }, { "epoch": 14.18, "learning_rate": 1.4552498398864763e-05, "loss": 2.9976, "step": 2259000 }, { "epoch": 14.19, "learning_rate": 1.4544649696726152e-05, "loss": 3.0001, "step": 2259500 }, { "epoch": 14.19, "learning_rate": 1.4536800994587535e-05, "loss": 2.997, "step": 2260000 }, { "epoch": 14.19, "learning_rate": 1.4528952292448923e-05, "loss": 3.0072, "step": 2260500 }, { "epoch": 14.2, "learning_rate": 1.4521103590310306e-05, "loss": 3.0046, "step": 2261000 }, { "epoch": 14.2, "learning_rate": 1.4513286282980245e-05, "loss": 3.0045, "step": 2261500 }, { "epoch": 14.2, "learning_rate": 1.4505437580841632e-05, "loss": 3.0016, "step": 2262000 }, { "epoch": 14.21, "learning_rate": 1.449758887870302e-05, "loss": 2.9959, "step": 2262500 }, { "epoch": 14.21, "learning_rate": 1.4489740176564403e-05, "loss": 2.9987, "step": 2263000 }, { "epoch": 14.21, "learning_rate": 1.4481891474425791e-05, "loss": 3.0004, "step": 2263500 }, { "epoch": 14.22, "learning_rate": 1.4474042772287174e-05, "loss": 3.002, "step": 2264000 }, { "epoch": 14.22, "learning_rate": 1.4466194070148562e-05, "loss": 3.0031, "step": 2264500 }, { "epoch": 14.22, "learning_rate": 1.4458345368009945e-05, "loss": 2.9991, "step": 2265000 }, { "epoch": 14.22, "learning_rate": 1.4450496665871332e-05, "loss": 2.9985, "step": 2265500 }, { "epoch": 14.23, "learning_rate": 1.4442647963732717e-05, "loss": 2.9998, "step": 2266000 }, { "epoch": 14.23, "learning_rate": 1.4434799261594103e-05, "loss": 3.0001, "step": 2266500 }, { "epoch": 14.23, "learning_rate": 1.4426950559455491e-05, "loss": 3.0058, "step": 2267000 }, { "epoch": 14.24, "learning_rate": 1.4419101857316874e-05, "loss": 2.9986, "step": 2267500 }, { "epoch": 14.24, "learning_rate": 1.4411253155178262e-05, "loss": 3.0023, "step": 2268000 }, { "epoch": 14.24, "learning_rate": 1.4403420150443922e-05, "loss": 2.9983, "step": 2268500 }, { "epoch": 14.25, "learning_rate": 1.4395587145709585e-05, "loss": 3.0061, "step": 2269000 }, { "epoch": 14.25, "learning_rate": 1.4387738443570971e-05, "loss": 3.003, "step": 2269500 }, { "epoch": 14.25, "learning_rate": 1.4379889741432356e-05, "loss": 3.0014, "step": 2270000 }, { "epoch": 14.26, "learning_rate": 1.4372041039293743e-05, "loss": 3.0001, "step": 2270500 }, { "epoch": 14.26, "learning_rate": 1.4364192337155127e-05, "loss": 2.9991, "step": 2271000 }, { "epoch": 14.26, "learning_rate": 1.4356343635016514e-05, "loss": 3.0048, "step": 2271500 }, { "epoch": 14.27, "learning_rate": 1.4348494932877902e-05, "loss": 2.9992, "step": 2272000 }, { "epoch": 14.27, "learning_rate": 1.4340646230739285e-05, "loss": 2.9948, "step": 2272500 }, { "epoch": 14.27, "learning_rate": 1.4332797528600671e-05, "loss": 2.9928, "step": 2273000 }, { "epoch": 14.28, "learning_rate": 1.4324948826462056e-05, "loss": 3.003, "step": 2273500 }, { "epoch": 14.28, "learning_rate": 1.431711582172772e-05, "loss": 2.9937, "step": 2274000 }, { "epoch": 14.28, "learning_rate": 1.4309267119589106e-05, "loss": 2.9964, "step": 2274500 }, { "epoch": 14.28, "learning_rate": 1.430141841745049e-05, "loss": 3.0037, "step": 2275000 }, { "epoch": 14.29, "learning_rate": 1.4293569715311877e-05, "loss": 2.9993, "step": 2275500 }, { "epoch": 14.29, "learning_rate": 1.4285721013173262e-05, "loss": 3.0022, "step": 2276000 }, { "epoch": 14.29, "learning_rate": 1.4277903705843201e-05, "loss": 3.0046, "step": 2276500 }, { "epoch": 14.3, "learning_rate": 1.4270055003704588e-05, "loss": 3.001, "step": 2277000 }, { "epoch": 14.3, "learning_rate": 1.4262206301565972e-05, "loss": 3.0079, "step": 2277500 }, { "epoch": 14.3, "learning_rate": 1.4254357599427359e-05, "loss": 3.0007, "step": 2278000 }, { "epoch": 14.31, "learning_rate": 1.4246508897288745e-05, "loss": 3.0065, "step": 2278500 }, { "epoch": 14.31, "learning_rate": 1.423866019515013e-05, "loss": 3.0039, "step": 2279000 }, { "epoch": 14.31, "learning_rate": 1.4230811493011517e-05, "loss": 3.0024, "step": 2279500 }, { "epoch": 14.32, "learning_rate": 1.4222962790872901e-05, "loss": 2.9999, "step": 2280000 }, { "epoch": 14.32, "eval_accuracy": 0.45834529115273703, "eval_loss": 2.9296443462371826, "eval_runtime": 1449.2743, "eval_samples_per_second": 57.823, "eval_steps_per_second": 5.783, "step": 2280000 }, { "epoch": 14.32, "learning_rate": 1.4215129786138564e-05, "loss": 2.9966, "step": 2280500 }, { "epoch": 14.32, "learning_rate": 1.420728108399995e-05, "loss": 2.997, "step": 2281000 }, { "epoch": 14.33, "learning_rate": 1.4199432381861336e-05, "loss": 3.0006, "step": 2281500 }, { "epoch": 14.33, "learning_rate": 1.4191583679722722e-05, "loss": 3.0042, "step": 2282000 }, { "epoch": 14.33, "learning_rate": 1.4183734977584107e-05, "loss": 2.9937, "step": 2282500 }, { "epoch": 14.33, "learning_rate": 1.4175886275445493e-05, "loss": 2.9993, "step": 2283000 }, { "epoch": 14.34, "learning_rate": 1.4168037573306878e-05, "loss": 3.0016, "step": 2283500 }, { "epoch": 14.34, "learning_rate": 1.4160188871168264e-05, "loss": 3.0005, "step": 2284000 }, { "epoch": 14.34, "learning_rate": 1.4152340169029649e-05, "loss": 2.9985, "step": 2284500 }, { "epoch": 14.35, "learning_rate": 1.4144491466891035e-05, "loss": 3.0009, "step": 2285000 }, { "epoch": 14.35, "learning_rate": 1.4136642764752422e-05, "loss": 2.9998, "step": 2285500 }, { "epoch": 14.35, "learning_rate": 1.4128794062613807e-05, "loss": 2.99, "step": 2286000 }, { "epoch": 14.36, "learning_rate": 1.412096105787947e-05, "loss": 3.0061, "step": 2286500 }, { "epoch": 14.36, "learning_rate": 1.4113112355740854e-05, "loss": 3.0034, "step": 2287000 }, { "epoch": 14.36, "learning_rate": 1.4105263653602241e-05, "loss": 3.0076, "step": 2287500 }, { "epoch": 14.37, "learning_rate": 1.4097430648867904e-05, "loss": 2.9984, "step": 2288000 }, { "epoch": 14.37, "learning_rate": 1.4089581946729289e-05, "loss": 3.001, "step": 2288500 }, { "epoch": 14.37, "learning_rate": 1.4081733244590675e-05, "loss": 2.9999, "step": 2289000 }, { "epoch": 14.38, "learning_rate": 1.4073884542452062e-05, "loss": 3.0058, "step": 2289500 }, { "epoch": 14.38, "learning_rate": 1.4066051537717723e-05, "loss": 2.9995, "step": 2290000 }, { "epoch": 14.38, "learning_rate": 1.405820283557911e-05, "loss": 3.0081, "step": 2290500 }, { "epoch": 14.39, "learning_rate": 1.4050354133440494e-05, "loss": 2.9991, "step": 2291000 }, { "epoch": 14.39, "learning_rate": 1.404250543130188e-05, "loss": 3.0008, "step": 2291500 }, { "epoch": 14.39, "learning_rate": 1.4034672426567544e-05, "loss": 3.005, "step": 2292000 }, { "epoch": 14.39, "learning_rate": 1.4026823724428928e-05, "loss": 2.9988, "step": 2292500 }, { "epoch": 14.4, "learning_rate": 1.4018975022290315e-05, "loss": 3.0007, "step": 2293000 }, { "epoch": 14.4, "learning_rate": 1.4011142017555978e-05, "loss": 3.0001, "step": 2293500 }, { "epoch": 14.4, "learning_rate": 1.4003293315417363e-05, "loss": 3.0016, "step": 2294000 }, { "epoch": 14.41, "learning_rate": 1.3995444613278749e-05, "loss": 3.0037, "step": 2294500 }, { "epoch": 14.41, "learning_rate": 1.3987595911140134e-05, "loss": 2.9978, "step": 2295000 }, { "epoch": 14.41, "learning_rate": 1.397974720900152e-05, "loss": 3.0007, "step": 2295500 }, { "epoch": 14.42, "learning_rate": 1.3971898506862907e-05, "loss": 3.0008, "step": 2296000 }, { "epoch": 14.42, "learning_rate": 1.3964049804724291e-05, "loss": 3.0035, "step": 2296500 }, { "epoch": 14.42, "learning_rate": 1.3956201102585678e-05, "loss": 3.0024, "step": 2297000 }, { "epoch": 14.43, "learning_rate": 1.3948352400447063e-05, "loss": 3.0044, "step": 2297500 }, { "epoch": 14.43, "learning_rate": 1.3940519395712726e-05, "loss": 2.9984, "step": 2298000 }, { "epoch": 14.43, "learning_rate": 1.3932670693574112e-05, "loss": 2.9997, "step": 2298500 }, { "epoch": 14.44, "learning_rate": 1.3924821991435497e-05, "loss": 3.0066, "step": 2299000 }, { "epoch": 14.44, "learning_rate": 1.3916973289296883e-05, "loss": 2.9994, "step": 2299500 }, { "epoch": 14.44, "learning_rate": 1.3909140284562545e-05, "loss": 3.0059, "step": 2300000 }, { "epoch": 14.44, "learning_rate": 1.3901291582423931e-05, "loss": 2.9997, "step": 2300500 }, { "epoch": 14.45, "learning_rate": 1.3893442880285317e-05, "loss": 2.9992, "step": 2301000 }, { "epoch": 14.45, "learning_rate": 1.3885609875550979e-05, "loss": 3.0005, "step": 2301500 }, { "epoch": 14.45, "learning_rate": 1.3877761173412365e-05, "loss": 3.0012, "step": 2302000 }, { "epoch": 14.46, "learning_rate": 1.3869912471273752e-05, "loss": 2.9988, "step": 2302500 }, { "epoch": 14.46, "learning_rate": 1.3862063769135136e-05, "loss": 3.0008, "step": 2303000 }, { "epoch": 14.46, "learning_rate": 1.3854215066996523e-05, "loss": 2.9981, "step": 2303500 }, { "epoch": 14.47, "learning_rate": 1.3846382062262184e-05, "loss": 3.0082, "step": 2304000 }, { "epoch": 14.47, "learning_rate": 1.383853336012357e-05, "loss": 3.0009, "step": 2304500 }, { "epoch": 14.47, "learning_rate": 1.3830684657984957e-05, "loss": 2.9999, "step": 2305000 }, { "epoch": 14.48, "learning_rate": 1.3822835955846342e-05, "loss": 2.9988, "step": 2305500 }, { "epoch": 14.48, "learning_rate": 1.3814987253707728e-05, "loss": 2.995, "step": 2306000 }, { "epoch": 14.48, "learning_rate": 1.3807138551569113e-05, "loss": 3.0082, "step": 2306500 }, { "epoch": 14.49, "learning_rate": 1.37992898494305e-05, "loss": 3.0044, "step": 2307000 }, { "epoch": 14.49, "learning_rate": 1.3791441147291884e-05, "loss": 3.0009, "step": 2307500 }, { "epoch": 14.49, "learning_rate": 1.378359244515327e-05, "loss": 3.0081, "step": 2308000 }, { "epoch": 14.49, "learning_rate": 1.3775743743014655e-05, "loss": 3.0023, "step": 2308500 }, { "epoch": 14.5, "learning_rate": 1.3767895040876042e-05, "loss": 3.0102, "step": 2309000 }, { "epoch": 14.5, "learning_rate": 1.3760062036141705e-05, "loss": 3.007, "step": 2309500 }, { "epoch": 14.5, "learning_rate": 1.375221333400309e-05, "loss": 3.0021, "step": 2310000 }, { "epoch": 14.5, "eval_accuracy": 0.45851230693932166, "eval_loss": 2.927654266357422, "eval_runtime": 1451.0707, "eval_samples_per_second": 57.751, "eval_steps_per_second": 5.776, "step": 2310000 }, { "epoch": 14.51, "learning_rate": 1.3744364631864476e-05, "loss": 3.0024, "step": 2310500 }, { "epoch": 14.51, "learning_rate": 1.3736515929725859e-05, "loss": 3.0053, "step": 2311000 }, { "epoch": 14.51, "learning_rate": 1.3728667227587247e-05, "loss": 3.0086, "step": 2311500 }, { "epoch": 14.52, "learning_rate": 1.372083422285291e-05, "loss": 3.0006, "step": 2312000 }, { "epoch": 14.52, "learning_rate": 1.3712985520714295e-05, "loss": 3.0067, "step": 2312500 }, { "epoch": 14.52, "learning_rate": 1.3705136818575682e-05, "loss": 3.007, "step": 2313000 }, { "epoch": 14.53, "learning_rate": 1.3697303813841345e-05, "loss": 3.0085, "step": 2313500 }, { "epoch": 14.53, "learning_rate": 1.368945511170273e-05, "loss": 3.0037, "step": 2314000 }, { "epoch": 14.53, "learning_rate": 1.3681606409564116e-05, "loss": 3.0062, "step": 2314500 }, { "epoch": 14.54, "learning_rate": 1.36737577074255e-05, "loss": 3.0022, "step": 2315000 }, { "epoch": 14.54, "learning_rate": 1.3665909005286887e-05, "loss": 3.0025, "step": 2315500 }, { "epoch": 14.54, "learning_rate": 1.365806030314827e-05, "loss": 3.0035, "step": 2316000 }, { "epoch": 14.55, "learning_rate": 1.3650211601009658e-05, "loss": 3.0021, "step": 2316500 }, { "epoch": 14.55, "learning_rate": 1.3642362898871045e-05, "loss": 2.9961, "step": 2317000 }, { "epoch": 14.55, "learning_rate": 1.3634529894136704e-05, "loss": 2.9994, "step": 2317500 }, { "epoch": 14.55, "learning_rate": 1.3626681191998092e-05, "loss": 2.9985, "step": 2318000 }, { "epoch": 14.56, "learning_rate": 1.3618832489859479e-05, "loss": 3.003, "step": 2318500 }, { "epoch": 14.56, "learning_rate": 1.3610983787720864e-05, "loss": 2.999, "step": 2319000 }, { "epoch": 14.56, "learning_rate": 1.360313508558225e-05, "loss": 3.0007, "step": 2319500 }, { "epoch": 14.57, "learning_rate": 1.3595286383443635e-05, "loss": 3.0044, "step": 2320000 }, { "epoch": 14.57, "learning_rate": 1.3587453378709298e-05, "loss": 2.9933, "step": 2320500 }, { "epoch": 14.57, "learning_rate": 1.3579604676570684e-05, "loss": 2.9984, "step": 2321000 }, { "epoch": 14.58, "learning_rate": 1.3571755974432069e-05, "loss": 3.0007, "step": 2321500 }, { "epoch": 14.58, "learning_rate": 1.3563907272293455e-05, "loss": 3.003, "step": 2322000 }, { "epoch": 14.58, "learning_rate": 1.3556058570154838e-05, "loss": 3.0029, "step": 2322500 }, { "epoch": 14.59, "learning_rate": 1.3548209868016227e-05, "loss": 3.0017, "step": 2323000 }, { "epoch": 14.59, "learning_rate": 1.354036116587761e-05, "loss": 2.9971, "step": 2323500 }, { "epoch": 14.59, "learning_rate": 1.3532512463738998e-05, "loss": 2.9959, "step": 2324000 }, { "epoch": 14.6, "learning_rate": 1.352467945900466e-05, "loss": 2.9979, "step": 2324500 }, { "epoch": 14.6, "learning_rate": 1.35168621516746e-05, "loss": 3.0066, "step": 2325000 }, { "epoch": 14.6, "learning_rate": 1.3509013449535984e-05, "loss": 3.0051, "step": 2325500 }, { "epoch": 14.6, "learning_rate": 1.3501164747397372e-05, "loss": 3.0028, "step": 2326000 }, { "epoch": 14.61, "learning_rate": 1.3493316045258755e-05, "loss": 3.0042, "step": 2326500 }, { "epoch": 14.61, "learning_rate": 1.3485467343120143e-05, "loss": 3.001, "step": 2327000 }, { "epoch": 14.61, "learning_rate": 1.347761864098153e-05, "loss": 3.0091, "step": 2327500 }, { "epoch": 14.62, "learning_rate": 1.3469769938842914e-05, "loss": 3.0062, "step": 2328000 }, { "epoch": 14.62, "learning_rate": 1.34619212367043e-05, "loss": 3.0094, "step": 2328500 }, { "epoch": 14.62, "learning_rate": 1.3454072534565684e-05, "loss": 3.0074, "step": 2329000 }, { "epoch": 14.63, "learning_rate": 1.3446239529831348e-05, "loss": 3.0057, "step": 2329500 }, { "epoch": 14.63, "learning_rate": 1.3438390827692735e-05, "loss": 3.0021, "step": 2330000 }, { "epoch": 14.63, "learning_rate": 1.3430557822958394e-05, "loss": 3.0043, "step": 2330500 }, { "epoch": 14.64, "learning_rate": 1.3422709120819783e-05, "loss": 2.9973, "step": 2331000 }, { "epoch": 14.64, "learning_rate": 1.3414860418681169e-05, "loss": 2.9963, "step": 2331500 }, { "epoch": 14.64, "learning_rate": 1.3407011716542554e-05, "loss": 3.0083, "step": 2332000 }, { "epoch": 14.65, "learning_rate": 1.339916301440394e-05, "loss": 2.9957, "step": 2332500 }, { "epoch": 14.65, "learning_rate": 1.3391314312265323e-05, "loss": 3.0059, "step": 2333000 }, { "epoch": 14.65, "learning_rate": 1.3383465610126711e-05, "loss": 3.0059, "step": 2333500 }, { "epoch": 14.66, "learning_rate": 1.3375632605392374e-05, "loss": 2.9985, "step": 2334000 }, { "epoch": 14.66, "learning_rate": 1.3367783903253759e-05, "loss": 3.0068, "step": 2334500 }, { "epoch": 14.66, "learning_rate": 1.3359935201115146e-05, "loss": 3.0008, "step": 2335000 }, { "epoch": 14.66, "learning_rate": 1.3352086498976529e-05, "loss": 3.0008, "step": 2335500 }, { "epoch": 14.67, "learning_rate": 1.3344237796837917e-05, "loss": 3.0028, "step": 2336000 }, { "epoch": 14.67, "learning_rate": 1.33363890946993e-05, "loss": 3.0007, "step": 2336500 }, { "epoch": 14.67, "learning_rate": 1.3328540392560688e-05, "loss": 3.0099, "step": 2337000 }, { "epoch": 14.68, "learning_rate": 1.3320707387826351e-05, "loss": 3.0065, "step": 2337500 }, { "epoch": 14.68, "learning_rate": 1.3312858685687734e-05, "loss": 3.0041, "step": 2338000 }, { "epoch": 14.68, "learning_rate": 1.3305009983549122e-05, "loss": 2.9991, "step": 2338500 }, { "epoch": 14.69, "learning_rate": 1.3297161281410505e-05, "loss": 3.006, "step": 2339000 }, { "epoch": 14.69, "learning_rate": 1.3289312579271893e-05, "loss": 3.0021, "step": 2339500 }, { "epoch": 14.69, "learning_rate": 1.3281479574537556e-05, "loss": 3.0027, "step": 2340000 }, { "epoch": 14.69, "eval_accuracy": 0.45873694655505326, "eval_loss": 2.925776720046997, "eval_runtime": 1451.6631, "eval_samples_per_second": 57.728, "eval_steps_per_second": 5.773, "step": 2340000 }, { "epoch": 14.7, "learning_rate": 1.327363087239894e-05, "loss": 3.0033, "step": 2340500 }, { "epoch": 14.7, "learning_rate": 1.3265782170260328e-05, "loss": 3.0021, "step": 2341000 }, { "epoch": 14.7, "learning_rate": 1.325793346812171e-05, "loss": 3.0017, "step": 2341500 }, { "epoch": 14.71, "learning_rate": 1.3250084765983099e-05, "loss": 3.0018, "step": 2342000 }, { "epoch": 14.71, "learning_rate": 1.3242236063844482e-05, "loss": 3.0048, "step": 2342500 }, { "epoch": 14.71, "learning_rate": 1.3234387361705868e-05, "loss": 3.0053, "step": 2343000 }, { "epoch": 14.71, "learning_rate": 1.3226538659567256e-05, "loss": 3.0005, "step": 2343500 }, { "epoch": 14.72, "learning_rate": 1.321868995742864e-05, "loss": 3.0058, "step": 2344000 }, { "epoch": 14.72, "learning_rate": 1.3210841255290028e-05, "loss": 3.0061, "step": 2344500 }, { "epoch": 14.72, "learning_rate": 1.320300825055569e-05, "loss": 2.9967, "step": 2345000 }, { "epoch": 14.73, "learning_rate": 1.319517524582135e-05, "loss": 3.0004, "step": 2345500 }, { "epoch": 14.73, "learning_rate": 1.3187326543682738e-05, "loss": 3.0022, "step": 2346000 }, { "epoch": 14.73, "learning_rate": 1.3179477841544121e-05, "loss": 2.9978, "step": 2346500 }, { "epoch": 14.74, "learning_rate": 1.3171629139405508e-05, "loss": 3.0082, "step": 2347000 }, { "epoch": 14.74, "learning_rate": 1.3163780437266896e-05, "loss": 3.0023, "step": 2347500 }, { "epoch": 14.74, "learning_rate": 1.3155931735128279e-05, "loss": 2.9934, "step": 2348000 }, { "epoch": 14.75, "learning_rate": 1.3148083032989667e-05, "loss": 2.9988, "step": 2348500 }, { "epoch": 14.75, "learning_rate": 1.314023433085105e-05, "loss": 3.0021, "step": 2349000 }, { "epoch": 14.75, "learning_rate": 1.3132385628712437e-05, "loss": 3.0055, "step": 2349500 }, { "epoch": 14.76, "learning_rate": 1.3124552623978101e-05, "loss": 2.996, "step": 2350000 }, { "epoch": 14.76, "learning_rate": 1.3116703921839484e-05, "loss": 2.9974, "step": 2350500 }, { "epoch": 14.76, "learning_rate": 1.3108855219700873e-05, "loss": 3.0056, "step": 2351000 }, { "epoch": 14.76, "learning_rate": 1.3101006517562256e-05, "loss": 3.0006, "step": 2351500 }, { "epoch": 14.77, "learning_rate": 1.3093157815423642e-05, "loss": 3.0069, "step": 2352000 }, { "epoch": 14.77, "learning_rate": 1.3085309113285027e-05, "loss": 2.9986, "step": 2352500 }, { "epoch": 14.77, "learning_rate": 1.307747610855069e-05, "loss": 3.0005, "step": 2353000 }, { "epoch": 14.78, "learning_rate": 1.3069627406412078e-05, "loss": 3.006, "step": 2353500 }, { "epoch": 14.78, "learning_rate": 1.3061778704273461e-05, "loss": 3.012, "step": 2354000 }, { "epoch": 14.78, "learning_rate": 1.3053930002134847e-05, "loss": 3.0025, "step": 2354500 }, { "epoch": 14.79, "learning_rate": 1.3046081299996232e-05, "loss": 3.0044, "step": 2355000 }, { "epoch": 14.79, "learning_rate": 1.3038232597857619e-05, "loss": 3.005, "step": 2355500 }, { "epoch": 14.79, "learning_rate": 1.3030383895719003e-05, "loss": 3.0063, "step": 2356000 }, { "epoch": 14.8, "learning_rate": 1.302253519358039e-05, "loss": 2.999, "step": 2356500 }, { "epoch": 14.8, "learning_rate": 1.301471788625033e-05, "loss": 2.9956, "step": 2357000 }, { "epoch": 14.8, "learning_rate": 1.3006869184111718e-05, "loss": 3.0091, "step": 2357500 }, { "epoch": 14.81, "learning_rate": 1.29990204819731e-05, "loss": 3.0051, "step": 2358000 }, { "epoch": 14.81, "learning_rate": 1.2991171779834487e-05, "loss": 2.9997, "step": 2358500 }, { "epoch": 14.81, "learning_rate": 1.2983323077695872e-05, "loss": 3.0035, "step": 2359000 }, { "epoch": 14.82, "learning_rate": 1.2975474375557258e-05, "loss": 2.9974, "step": 2359500 }, { "epoch": 14.82, "learning_rate": 1.2967641370822921e-05, "loss": 3.0051, "step": 2360000 }, { "epoch": 14.82, "learning_rate": 1.2959792668684306e-05, "loss": 3.0027, "step": 2360500 }, { "epoch": 14.82, "learning_rate": 1.2951943966545693e-05, "loss": 2.9983, "step": 2361000 }, { "epoch": 14.83, "learning_rate": 1.2944095264407077e-05, "loss": 3.0066, "step": 2361500 }, { "epoch": 14.83, "learning_rate": 1.2936246562268464e-05, "loss": 3.0094, "step": 2362000 }, { "epoch": 14.83, "learning_rate": 1.2928413557534127e-05, "loss": 3.0031, "step": 2362500 }, { "epoch": 14.84, "learning_rate": 1.2920564855395512e-05, "loss": 3.0044, "step": 2363000 }, { "epoch": 14.84, "learning_rate": 1.2912716153256898e-05, "loss": 2.9976, "step": 2363500 }, { "epoch": 14.84, "learning_rate": 1.2904867451118283e-05, "loss": 3.0023, "step": 2364000 }, { "epoch": 14.85, "learning_rate": 1.2897034446383946e-05, "loss": 2.9943, "step": 2364500 }, { "epoch": 14.85, "learning_rate": 1.2889185744245332e-05, "loss": 2.9994, "step": 2365000 }, { "epoch": 14.85, "learning_rate": 1.2881337042106717e-05, "loss": 3.0117, "step": 2365500 }, { "epoch": 14.86, "learning_rate": 1.2873488339968103e-05, "loss": 3.0014, "step": 2366000 }, { "epoch": 14.86, "learning_rate": 1.2865639637829488e-05, "loss": 2.9937, "step": 2366500 }, { "epoch": 14.86, "learning_rate": 1.2857790935690875e-05, "loss": 3.0049, "step": 2367000 }, { "epoch": 14.87, "learning_rate": 1.2849957930956538e-05, "loss": 3.003, "step": 2367500 }, { "epoch": 14.87, "learning_rate": 1.2842109228817922e-05, "loss": 3.0076, "step": 2368000 }, { "epoch": 14.87, "learning_rate": 1.2834260526679309e-05, "loss": 3.0036, "step": 2368500 }, { "epoch": 14.87, "learning_rate": 1.2826411824540694e-05, "loss": 2.9986, "step": 2369000 }, { "epoch": 14.88, "learning_rate": 1.281856312240208e-05, "loss": 3.0023, "step": 2369500 }, { "epoch": 14.88, "learning_rate": 1.2810714420263466e-05, "loss": 3.0018, "step": 2370000 }, { "epoch": 14.88, "eval_accuracy": 0.4589703463925844, "eval_loss": 2.924165964126587, "eval_runtime": 1449.5855, "eval_samples_per_second": 57.81, "eval_steps_per_second": 5.782, "step": 2370000 }, { "epoch": 14.88, "learning_rate": 1.2802881415529128e-05, "loss": 3.0026, "step": 2370500 }, { "epoch": 14.89, "learning_rate": 1.2795032713390514e-05, "loss": 3.0047, "step": 2371000 }, { "epoch": 14.89, "learning_rate": 1.2787184011251899e-05, "loss": 3.0046, "step": 2371500 }, { "epoch": 14.89, "learning_rate": 1.2779351006517562e-05, "loss": 3.0042, "step": 2372000 }, { "epoch": 14.9, "learning_rate": 1.2771518001783225e-05, "loss": 3.0011, "step": 2372500 }, { "epoch": 14.9, "learning_rate": 1.2763669299644612e-05, "loss": 2.9945, "step": 2373000 }, { "epoch": 14.9, "learning_rate": 1.2755820597505996e-05, "loss": 2.9951, "step": 2373500 }, { "epoch": 14.91, "learning_rate": 1.2747971895367383e-05, "loss": 2.9984, "step": 2374000 }, { "epoch": 14.91, "learning_rate": 1.2740123193228767e-05, "loss": 2.9996, "step": 2374500 }, { "epoch": 14.91, "learning_rate": 1.2732274491090154e-05, "loss": 3.01, "step": 2375000 }, { "epoch": 14.92, "learning_rate": 1.2724425788951539e-05, "loss": 3.0015, "step": 2375500 }, { "epoch": 14.92, "learning_rate": 1.2716577086812925e-05, "loss": 2.9954, "step": 2376000 }, { "epoch": 14.92, "learning_rate": 1.2708728384674312e-05, "loss": 2.9959, "step": 2376500 }, { "epoch": 14.93, "learning_rate": 1.2700879682535696e-05, "loss": 3.0046, "step": 2377000 }, { "epoch": 14.93, "learning_rate": 1.2693030980397083e-05, "loss": 2.9982, "step": 2377500 }, { "epoch": 14.93, "learning_rate": 1.2685182278258467e-05, "loss": 3.0075, "step": 2378000 }, { "epoch": 14.93, "learning_rate": 1.2677333576119854e-05, "loss": 3.0032, "step": 2378500 }, { "epoch": 14.94, "learning_rate": 1.2669484873981239e-05, "loss": 3.0015, "step": 2379000 }, { "epoch": 14.94, "learning_rate": 1.2661651869246902e-05, "loss": 3.0037, "step": 2379500 }, { "epoch": 14.94, "learning_rate": 1.2653803167108288e-05, "loss": 3.0025, "step": 2380000 }, { "epoch": 14.95, "learning_rate": 1.2645954464969673e-05, "loss": 3.0093, "step": 2380500 }, { "epoch": 14.95, "learning_rate": 1.263810576283106e-05, "loss": 3.0037, "step": 2381000 }, { "epoch": 14.95, "learning_rate": 1.2630257060692444e-05, "loss": 2.9991, "step": 2381500 }, { "epoch": 14.96, "learning_rate": 1.2622424055958107e-05, "loss": 2.9978, "step": 2382000 }, { "epoch": 14.96, "learning_rate": 1.2614575353819494e-05, "loss": 3.0107, "step": 2382500 }, { "epoch": 14.96, "learning_rate": 1.2606726651680878e-05, "loss": 3.0069, "step": 2383000 }, { "epoch": 14.97, "learning_rate": 1.2598893646946541e-05, "loss": 2.9995, "step": 2383500 }, { "epoch": 14.97, "learning_rate": 1.2591044944807928e-05, "loss": 3.0017, "step": 2384000 }, { "epoch": 14.97, "learning_rate": 1.2583196242669313e-05, "loss": 3.0019, "step": 2384500 }, { "epoch": 14.98, "learning_rate": 1.2575347540530699e-05, "loss": 3.0051, "step": 2385000 }, { "epoch": 14.98, "learning_rate": 1.2567498838392084e-05, "loss": 3.0029, "step": 2385500 }, { "epoch": 14.98, "learning_rate": 1.255965013625347e-05, "loss": 2.9914, "step": 2386000 }, { "epoch": 14.98, "learning_rate": 1.2551801434114855e-05, "loss": 3.0023, "step": 2386500 }, { "epoch": 14.99, "learning_rate": 1.2543968429380518e-05, "loss": 2.9942, "step": 2387000 }, { "epoch": 14.99, "learning_rate": 1.2536119727241904e-05, "loss": 2.9992, "step": 2387500 }, { "epoch": 14.99, "learning_rate": 1.2528271025103289e-05, "loss": 3.0093, "step": 2388000 }, { "epoch": 15.0, "learning_rate": 1.2520422322964676e-05, "loss": 3.0051, "step": 2388500 }, { "epoch": 15.0, "learning_rate": 1.251257362082606e-05, "loss": 3.003, "step": 2389000 }, { "epoch": 15.0, "learning_rate": 1.2504724918687447e-05, "loss": 2.9924, "step": 2389500 }, { "epoch": 15.01, "learning_rate": 1.2496876216548831e-05, "loss": 2.9931, "step": 2390000 }, { "epoch": 15.01, "learning_rate": 1.2489027514410218e-05, "loss": 2.9798, "step": 2390500 }, { "epoch": 15.01, "learning_rate": 1.2481178812271603e-05, "loss": 2.986, "step": 2391000 }, { "epoch": 15.02, "learning_rate": 1.2473330110132989e-05, "loss": 2.9815, "step": 2391500 }, { "epoch": 15.02, "learning_rate": 1.2465481407994374e-05, "loss": 2.9922, "step": 2392000 }, { "epoch": 15.02, "learning_rate": 1.2457648403260037e-05, "loss": 2.9855, "step": 2392500 }, { "epoch": 15.03, "learning_rate": 1.2449799701121423e-05, "loss": 2.9887, "step": 2393000 }, { "epoch": 15.03, "learning_rate": 1.2441950998982808e-05, "loss": 2.9848, "step": 2393500 }, { "epoch": 15.03, "learning_rate": 1.2434102296844194e-05, "loss": 2.99, "step": 2394000 }, { "epoch": 15.03, "learning_rate": 1.2426269292109858e-05, "loss": 2.984, "step": 2394500 }, { "epoch": 15.04, "learning_rate": 1.2418420589971242e-05, "loss": 2.9888, "step": 2395000 }, { "epoch": 15.04, "learning_rate": 1.2410587585236905e-05, "loss": 2.9875, "step": 2395500 }, { "epoch": 15.04, "learning_rate": 1.2402738883098292e-05, "loss": 2.9824, "step": 2396000 }, { "epoch": 15.05, "learning_rate": 1.2394890180959677e-05, "loss": 2.9884, "step": 2396500 }, { "epoch": 15.05, "learning_rate": 1.2387041478821063e-05, "loss": 2.9861, "step": 2397000 }, { "epoch": 15.05, "learning_rate": 1.2379192776682448e-05, "loss": 2.9799, "step": 2397500 }, { "epoch": 15.06, "learning_rate": 1.2371344074543834e-05, "loss": 2.9915, "step": 2398000 }, { "epoch": 15.06, "learning_rate": 1.2363495372405219e-05, "loss": 2.9891, "step": 2398500 }, { "epoch": 15.06, "learning_rate": 1.2355646670266605e-05, "loss": 2.986, "step": 2399000 }, { "epoch": 15.07, "learning_rate": 1.2347813665532268e-05, "loss": 2.9893, "step": 2399500 }, { "epoch": 15.07, "learning_rate": 1.2339964963393653e-05, "loss": 2.9896, "step": 2400000 }, { "epoch": 15.07, "eval_accuracy": 0.45912608237293717, "eval_loss": 2.9235572814941406, "eval_runtime": 1450.6843, "eval_samples_per_second": 57.767, "eval_steps_per_second": 5.777, "step": 2400000 }, { "epoch": 15.07, "learning_rate": 1.233211626125504e-05, "loss": 2.9955, "step": 2400500 }, { "epoch": 15.08, "learning_rate": 1.2324267559116424e-05, "loss": 2.9912, "step": 2401000 }, { "epoch": 15.08, "learning_rate": 1.231641885697781e-05, "loss": 2.9844, "step": 2401500 }, { "epoch": 15.08, "learning_rate": 1.230860154964775e-05, "loss": 2.9861, "step": 2402000 }, { "epoch": 15.09, "learning_rate": 1.2300752847509137e-05, "loss": 2.9822, "step": 2402500 }, { "epoch": 15.09, "learning_rate": 1.2292904145370522e-05, "loss": 2.9899, "step": 2403000 }, { "epoch": 15.09, "learning_rate": 1.2285055443231908e-05, "loss": 2.9935, "step": 2403500 }, { "epoch": 15.09, "learning_rate": 1.2277206741093293e-05, "loss": 2.9846, "step": 2404000 }, { "epoch": 15.1, "learning_rate": 1.226935803895468e-05, "loss": 2.9915, "step": 2404500 }, { "epoch": 15.1, "learning_rate": 1.2261509336816064e-05, "loss": 2.9864, "step": 2405000 }, { "epoch": 15.1, "learning_rate": 1.225366063467745e-05, "loss": 2.9893, "step": 2405500 }, { "epoch": 15.11, "learning_rate": 1.2245811932538837e-05, "loss": 2.9917, "step": 2406000 }, { "epoch": 15.11, "learning_rate": 1.2237963230400222e-05, "loss": 2.9897, "step": 2406500 }, { "epoch": 15.11, "learning_rate": 1.2230114528261608e-05, "loss": 2.991, "step": 2407000 }, { "epoch": 15.12, "learning_rate": 1.222228152352727e-05, "loss": 2.9911, "step": 2407500 }, { "epoch": 15.12, "learning_rate": 1.2214432821388656e-05, "loss": 2.9958, "step": 2408000 }, { "epoch": 15.12, "learning_rate": 1.2206584119250042e-05, "loss": 2.9959, "step": 2408500 }, { "epoch": 15.13, "learning_rate": 1.2198735417111427e-05, "loss": 2.9869, "step": 2409000 }, { "epoch": 15.13, "learning_rate": 1.2190886714972813e-05, "loss": 2.9857, "step": 2409500 }, { "epoch": 15.13, "learning_rate": 1.2183053710238477e-05, "loss": 2.9913, "step": 2410000 }, { "epoch": 15.14, "learning_rate": 1.2175205008099861e-05, "loss": 2.9933, "step": 2410500 }, { "epoch": 15.14, "learning_rate": 1.2167356305961248e-05, "loss": 2.9927, "step": 2411000 }, { "epoch": 15.14, "learning_rate": 1.2159507603822632e-05, "loss": 2.989, "step": 2411500 }, { "epoch": 15.14, "learning_rate": 1.2151674599088295e-05, "loss": 2.9924, "step": 2412000 }, { "epoch": 15.15, "learning_rate": 1.2143841594353959e-05, "loss": 2.993, "step": 2412500 }, { "epoch": 15.15, "learning_rate": 1.2135992892215343e-05, "loss": 2.9875, "step": 2413000 }, { "epoch": 15.15, "learning_rate": 1.212814419007673e-05, "loss": 2.9947, "step": 2413500 }, { "epoch": 15.16, "learning_rate": 1.2120295487938114e-05, "loss": 2.9963, "step": 2414000 }, { "epoch": 15.16, "learning_rate": 1.2112446785799501e-05, "loss": 2.9924, "step": 2414500 }, { "epoch": 15.16, "learning_rate": 1.2104598083660887e-05, "loss": 2.9871, "step": 2415000 }, { "epoch": 15.17, "learning_rate": 1.2096749381522272e-05, "loss": 2.9931, "step": 2415500 }, { "epoch": 15.17, "learning_rate": 1.2088900679383659e-05, "loss": 2.9829, "step": 2416000 }, { "epoch": 15.17, "learning_rate": 1.2081051977245043e-05, "loss": 2.9869, "step": 2416500 }, { "epoch": 15.18, "learning_rate": 1.207320327510643e-05, "loss": 2.9861, "step": 2417000 }, { "epoch": 15.18, "learning_rate": 1.2065354572967814e-05, "loss": 2.9921, "step": 2417500 }, { "epoch": 15.18, "learning_rate": 1.2057537265637754e-05, "loss": 2.9881, "step": 2418000 }, { "epoch": 15.19, "learning_rate": 1.204968856349914e-05, "loss": 2.9907, "step": 2418500 }, { "epoch": 15.19, "learning_rate": 1.2041839861360527e-05, "loss": 2.9925, "step": 2419000 }, { "epoch": 15.19, "learning_rate": 1.2033991159221912e-05, "loss": 2.9919, "step": 2419500 }, { "epoch": 15.2, "learning_rate": 1.2026142457083298e-05, "loss": 2.991, "step": 2420000 }, { "epoch": 15.2, "learning_rate": 1.2018293754944683e-05, "loss": 2.9927, "step": 2420500 }, { "epoch": 15.2, "learning_rate": 1.201044505280607e-05, "loss": 2.9978, "step": 2421000 }, { "epoch": 15.2, "learning_rate": 1.2002596350667454e-05, "loss": 3.0006, "step": 2421500 }, { "epoch": 15.21, "learning_rate": 1.1994747648528839e-05, "loss": 2.9967, "step": 2422000 }, { "epoch": 15.21, "learning_rate": 1.1986914643794504e-05, "loss": 2.9903, "step": 2422500 }, { "epoch": 15.21, "learning_rate": 1.1979065941655888e-05, "loss": 2.999, "step": 2423000 }, { "epoch": 15.22, "learning_rate": 1.1971217239517273e-05, "loss": 2.9862, "step": 2423500 }, { "epoch": 15.22, "learning_rate": 1.196336853737866e-05, "loss": 2.9986, "step": 2424000 }, { "epoch": 15.22, "learning_rate": 1.19555512300486e-05, "loss": 2.9931, "step": 2424500 }, { "epoch": 15.23, "learning_rate": 1.1947702527909986e-05, "loss": 2.9913, "step": 2425000 }, { "epoch": 15.23, "learning_rate": 1.1939853825771372e-05, "loss": 2.9915, "step": 2425500 }, { "epoch": 15.23, "learning_rate": 1.1932005123632757e-05, "loss": 2.9982, "step": 2426000 }, { "epoch": 15.24, "learning_rate": 1.1924156421494143e-05, "loss": 2.9934, "step": 2426500 }, { "epoch": 15.24, "learning_rate": 1.1916307719355528e-05, "loss": 2.9873, "step": 2427000 }, { "epoch": 15.24, "learning_rate": 1.1908459017216914e-05, "loss": 2.9911, "step": 2427500 }, { "epoch": 15.25, "learning_rate": 1.19006103150783e-05, "loss": 2.9898, "step": 2428000 }, { "epoch": 15.25, "learning_rate": 1.1892761612939684e-05, "loss": 2.9935, "step": 2428500 }, { "epoch": 15.25, "learning_rate": 1.1884928608205349e-05, "loss": 2.9863, "step": 2429000 }, { "epoch": 15.25, "learning_rate": 1.1877079906066733e-05, "loss": 2.9945, "step": 2429500 }, { "epoch": 15.26, "learning_rate": 1.1869231203928118e-05, "loss": 2.9959, "step": 2430000 }, { "epoch": 15.26, "eval_accuracy": 0.4594209320752348, "eval_loss": 2.9216904640197754, "eval_runtime": 1450.6747, "eval_samples_per_second": 57.767, "eval_steps_per_second": 5.777, "step": 2430000 }, { "epoch": 15.26, "learning_rate": 1.1861382501789505e-05, "loss": 2.9905, "step": 2430500 }, { "epoch": 15.26, "learning_rate": 1.185353379965089e-05, "loss": 2.995, "step": 2431000 }, { "epoch": 15.27, "learning_rate": 1.1845700794916554e-05, "loss": 2.9931, "step": 2431500 }, { "epoch": 15.27, "learning_rate": 1.1837852092777939e-05, "loss": 2.9924, "step": 2432000 }, { "epoch": 15.27, "learning_rate": 1.1830003390639324e-05, "loss": 2.9912, "step": 2432500 }, { "epoch": 15.28, "learning_rate": 1.182215468850071e-05, "loss": 2.9954, "step": 2433000 }, { "epoch": 15.28, "learning_rate": 1.1814305986362095e-05, "loss": 2.9907, "step": 2433500 }, { "epoch": 15.28, "learning_rate": 1.1806457284223481e-05, "loss": 2.989, "step": 2434000 }, { "epoch": 15.29, "learning_rate": 1.1798624279489144e-05, "loss": 2.9925, "step": 2434500 }, { "epoch": 15.29, "learning_rate": 1.1790775577350529e-05, "loss": 2.9958, "step": 2435000 }, { "epoch": 15.29, "learning_rate": 1.1782926875211915e-05, "loss": 2.9856, "step": 2435500 }, { "epoch": 15.3, "learning_rate": 1.17750781730733e-05, "loss": 2.999, "step": 2436000 }, { "epoch": 15.3, "learning_rate": 1.1767229470934687e-05, "loss": 2.9967, "step": 2436500 }, { "epoch": 15.3, "learning_rate": 1.1759412163604628e-05, "loss": 2.9976, "step": 2437000 }, { "epoch": 15.3, "learning_rate": 1.1751563461466013e-05, "loss": 2.9931, "step": 2437500 }, { "epoch": 15.31, "learning_rate": 1.1743714759327397e-05, "loss": 2.9897, "step": 2438000 }, { "epoch": 15.31, "learning_rate": 1.1735866057188784e-05, "loss": 2.9953, "step": 2438500 }, { "epoch": 15.31, "learning_rate": 1.1728017355050169e-05, "loss": 2.9892, "step": 2439000 }, { "epoch": 15.32, "learning_rate": 1.1720168652911555e-05, "loss": 2.9915, "step": 2439500 }, { "epoch": 15.32, "learning_rate": 1.171231995077294e-05, "loss": 2.9911, "step": 2440000 }, { "epoch": 15.32, "learning_rate": 1.1704471248634326e-05, "loss": 2.9871, "step": 2440500 }, { "epoch": 15.33, "learning_rate": 1.1696622546495713e-05, "loss": 2.99, "step": 2441000 }, { "epoch": 15.33, "learning_rate": 1.1688773844357097e-05, "loss": 2.9954, "step": 2441500 }, { "epoch": 15.33, "learning_rate": 1.168094083962276e-05, "loss": 2.9886, "step": 2442000 }, { "epoch": 15.34, "learning_rate": 1.1673092137484145e-05, "loss": 2.9891, "step": 2442500 }, { "epoch": 15.34, "learning_rate": 1.1665243435345532e-05, "loss": 2.9966, "step": 2443000 }, { "epoch": 15.34, "learning_rate": 1.1657394733206918e-05, "loss": 2.9912, "step": 2443500 }, { "epoch": 15.35, "learning_rate": 1.1649546031068303e-05, "loss": 2.9966, "step": 2444000 }, { "epoch": 15.35, "learning_rate": 1.1641713026333966e-05, "loss": 2.9968, "step": 2444500 }, { "epoch": 15.35, "learning_rate": 1.163386432419535e-05, "loss": 2.9909, "step": 2445000 }, { "epoch": 15.36, "learning_rate": 1.1626015622056737e-05, "loss": 2.9892, "step": 2445500 }, { "epoch": 15.36, "learning_rate": 1.1618166919918124e-05, "loss": 2.9989, "step": 2446000 }, { "epoch": 15.36, "learning_rate": 1.1610318217779508e-05, "loss": 2.993, "step": 2446500 }, { "epoch": 15.36, "learning_rate": 1.1602469515640895e-05, "loss": 2.9861, "step": 2447000 }, { "epoch": 15.37, "learning_rate": 1.1594636510906558e-05, "loss": 2.9918, "step": 2447500 }, { "epoch": 15.37, "learning_rate": 1.1586787808767943e-05, "loss": 2.9911, "step": 2448000 }, { "epoch": 15.37, "learning_rate": 1.1578939106629329e-05, "loss": 2.9918, "step": 2448500 }, { "epoch": 15.38, "learning_rate": 1.1571090404490714e-05, "loss": 2.993, "step": 2449000 }, { "epoch": 15.38, "learning_rate": 1.1563257399756377e-05, "loss": 2.9943, "step": 2449500 }, { "epoch": 15.38, "learning_rate": 1.1555408697617763e-05, "loss": 2.9925, "step": 2450000 }, { "epoch": 15.39, "learning_rate": 1.1547559995479148e-05, "loss": 2.9889, "step": 2450500 }, { "epoch": 15.39, "learning_rate": 1.1539711293340534e-05, "loss": 2.9926, "step": 2451000 }, { "epoch": 15.39, "learning_rate": 1.1531878288606196e-05, "loss": 2.9962, "step": 2451500 }, { "epoch": 15.4, "learning_rate": 1.1524029586467582e-05, "loss": 2.9951, "step": 2452000 }, { "epoch": 15.4, "learning_rate": 1.1516180884328969e-05, "loss": 2.9903, "step": 2452500 }, { "epoch": 15.4, "learning_rate": 1.1508332182190353e-05, "loss": 2.9931, "step": 2453000 }, { "epoch": 15.41, "learning_rate": 1.150048348005174e-05, "loss": 2.9934, "step": 2453500 }, { "epoch": 15.41, "learning_rate": 1.1492634777913125e-05, "loss": 2.9954, "step": 2454000 }, { "epoch": 15.41, "learning_rate": 1.1484817470583064e-05, "loss": 2.9914, "step": 2454500 }, { "epoch": 15.41, "learning_rate": 1.147696876844445e-05, "loss": 2.9905, "step": 2455000 }, { "epoch": 15.42, "learning_rate": 1.1469120066305835e-05, "loss": 2.9887, "step": 2455500 }, { "epoch": 15.42, "learning_rate": 1.1461271364167222e-05, "loss": 2.9921, "step": 2456000 }, { "epoch": 15.42, "learning_rate": 1.1453422662028608e-05, "loss": 2.9914, "step": 2456500 }, { "epoch": 15.43, "learning_rate": 1.1445573959889993e-05, "loss": 2.9862, "step": 2457000 }, { "epoch": 15.43, "learning_rate": 1.143772525775138e-05, "loss": 2.9914, "step": 2457500 }, { "epoch": 15.43, "learning_rate": 1.1429876555612764e-05, "loss": 2.9922, "step": 2458000 }, { "epoch": 15.44, "learning_rate": 1.142202785347415e-05, "loss": 2.9892, "step": 2458500 }, { "epoch": 15.44, "learning_rate": 1.1414179151335535e-05, "loss": 2.9941, "step": 2459000 }, { "epoch": 15.44, "learning_rate": 1.1406330449196922e-05, "loss": 2.99, "step": 2459500 }, { "epoch": 15.45, "learning_rate": 1.1398481747058307e-05, "loss": 2.9926, "step": 2460000 }, { "epoch": 15.45, "eval_accuracy": 0.4594542699099924, "eval_loss": 2.9202706813812256, "eval_runtime": 1450.987, "eval_samples_per_second": 57.754, "eval_steps_per_second": 5.776, "step": 2460000 }, { "epoch": 15.45, "learning_rate": 1.139064874232397e-05, "loss": 2.9968, "step": 2460500 }, { "epoch": 15.45, "learning_rate": 1.1382800040185356e-05, "loss": 2.996, "step": 2461000 }, { "epoch": 15.46, "learning_rate": 1.137495133804674e-05, "loss": 2.9922, "step": 2461500 }, { "epoch": 15.46, "learning_rate": 1.1367118333312404e-05, "loss": 2.9967, "step": 2462000 }, { "epoch": 15.46, "learning_rate": 1.135926963117379e-05, "loss": 2.9974, "step": 2462500 }, { "epoch": 15.47, "learning_rate": 1.1351420929035175e-05, "loss": 2.9904, "step": 2463000 }, { "epoch": 15.47, "learning_rate": 1.1343587924300838e-05, "loss": 2.9895, "step": 2463500 }, { "epoch": 15.47, "learning_rate": 1.1335754919566501e-05, "loss": 2.989, "step": 2464000 }, { "epoch": 15.47, "learning_rate": 1.1327906217427886e-05, "loss": 2.9951, "step": 2464500 }, { "epoch": 15.48, "learning_rate": 1.1320057515289272e-05, "loss": 2.999, "step": 2465000 }, { "epoch": 15.48, "learning_rate": 1.1312208813150659e-05, "loss": 2.9921, "step": 2465500 }, { "epoch": 15.48, "learning_rate": 1.1304360111012044e-05, "loss": 2.9905, "step": 2466000 }, { "epoch": 15.49, "learning_rate": 1.129651140887343e-05, "loss": 2.994, "step": 2466500 }, { "epoch": 15.49, "learning_rate": 1.1288662706734815e-05, "loss": 2.9903, "step": 2467000 }, { "epoch": 15.49, "learning_rate": 1.1280814004596201e-05, "loss": 2.9931, "step": 2467500 }, { "epoch": 15.5, "learning_rate": 1.1272965302457586e-05, "loss": 2.9921, "step": 2468000 }, { "epoch": 15.5, "learning_rate": 1.1265116600318972e-05, "loss": 2.9942, "step": 2468500 }, { "epoch": 15.5, "learning_rate": 1.1257283595584635e-05, "loss": 2.9869, "step": 2469000 }, { "epoch": 15.51, "learning_rate": 1.124943489344602e-05, "loss": 2.9938, "step": 2469500 }, { "epoch": 15.51, "learning_rate": 1.1241586191307407e-05, "loss": 2.9945, "step": 2470000 }, { "epoch": 15.51, "learning_rate": 1.1233737489168791e-05, "loss": 2.995, "step": 2470500 }, { "epoch": 15.52, "learning_rate": 1.1225888787030176e-05, "loss": 2.9904, "step": 2471000 }, { "epoch": 15.52, "learning_rate": 1.121805578229584e-05, "loss": 2.9916, "step": 2471500 }, { "epoch": 15.52, "learning_rate": 1.1210207080157226e-05, "loss": 2.9906, "step": 2472000 }, { "epoch": 15.52, "learning_rate": 1.1202358378018612e-05, "loss": 2.9985, "step": 2472500 }, { "epoch": 15.53, "learning_rate": 1.1194509675879997e-05, "loss": 2.9916, "step": 2473000 }, { "epoch": 15.53, "learning_rate": 1.1186660973741381e-05, "loss": 2.9901, "step": 2473500 }, { "epoch": 15.53, "learning_rate": 1.1178812271602768e-05, "loss": 2.9949, "step": 2474000 }, { "epoch": 15.54, "learning_rate": 1.1170963569464154e-05, "loss": 2.9892, "step": 2474500 }, { "epoch": 15.54, "learning_rate": 1.116311486732554e-05, "loss": 2.998, "step": 2475000 }, { "epoch": 15.54, "learning_rate": 1.1155266165186925e-05, "loss": 2.994, "step": 2475500 }, { "epoch": 15.55, "learning_rate": 1.1147417463048312e-05, "loss": 2.9944, "step": 2476000 }, { "epoch": 15.55, "learning_rate": 1.1139568760909697e-05, "loss": 2.993, "step": 2476500 }, { "epoch": 15.55, "learning_rate": 1.113173575617536e-05, "loss": 2.9883, "step": 2477000 }, { "epoch": 15.56, "learning_rate": 1.1123887054036746e-05, "loss": 2.9937, "step": 2477500 }, { "epoch": 15.56, "learning_rate": 1.1116038351898131e-05, "loss": 2.9903, "step": 2478000 }, { "epoch": 15.56, "learning_rate": 1.1108189649759516e-05, "loss": 2.9941, "step": 2478500 }, { "epoch": 15.57, "learning_rate": 1.1100340947620902e-05, "loss": 2.9934, "step": 2479000 }, { "epoch": 15.57, "learning_rate": 1.1092492245482287e-05, "loss": 2.993, "step": 2479500 }, { "epoch": 15.57, "learning_rate": 1.1084643543343673e-05, "loss": 2.9918, "step": 2480000 }, { "epoch": 15.57, "learning_rate": 1.1076826236013613e-05, "loss": 2.9976, "step": 2480500 }, { "epoch": 15.58, "learning_rate": 1.1068977533875e-05, "loss": 2.9919, "step": 2481000 }, { "epoch": 15.58, "learning_rate": 1.1061128831736386e-05, "loss": 2.9985, "step": 2481500 }, { "epoch": 15.58, "learning_rate": 1.105328012959777e-05, "loss": 2.9848, "step": 2482000 }, { "epoch": 15.59, "learning_rate": 1.1045431427459155e-05, "loss": 2.9972, "step": 2482500 }, { "epoch": 15.59, "learning_rate": 1.1037582725320542e-05, "loss": 2.9902, "step": 2483000 }, { "epoch": 15.59, "learning_rate": 1.1029734023181926e-05, "loss": 2.9975, "step": 2483500 }, { "epoch": 15.6, "learning_rate": 1.1021885321043313e-05, "loss": 2.9965, "step": 2484000 }, { "epoch": 15.6, "learning_rate": 1.1014036618904698e-05, "loss": 2.9884, "step": 2484500 }, { "epoch": 15.6, "learning_rate": 1.1006187916766084e-05, "loss": 2.9915, "step": 2485000 }, { "epoch": 15.61, "learning_rate": 1.0998354912031747e-05, "loss": 2.9878, "step": 2485500 }, { "epoch": 15.61, "learning_rate": 1.0990506209893132e-05, "loss": 2.9942, "step": 2486000 }, { "epoch": 15.61, "learning_rate": 1.0982657507754518e-05, "loss": 3.0007, "step": 2486500 }, { "epoch": 15.62, "learning_rate": 1.0974808805615903e-05, "loss": 2.9953, "step": 2487000 }, { "epoch": 15.62, "learning_rate": 1.096696010347729e-05, "loss": 2.9964, "step": 2487500 }, { "epoch": 15.62, "learning_rate": 1.0959127098742953e-05, "loss": 2.9939, "step": 2488000 }, { "epoch": 15.63, "learning_rate": 1.0951278396604337e-05, "loss": 2.9948, "step": 2488500 }, { "epoch": 15.63, "learning_rate": 1.0943429694465724e-05, "loss": 2.9867, "step": 2489000 }, { "epoch": 15.63, "learning_rate": 1.0935580992327108e-05, "loss": 2.9951, "step": 2489500 }, { "epoch": 15.63, "learning_rate": 1.0927732290188495e-05, "loss": 2.9921, "step": 2490000 }, { "epoch": 15.63, "eval_accuracy": 0.45983485547144615, "eval_loss": 2.9178946018218994, "eval_runtime": 1451.1565, "eval_samples_per_second": 57.748, "eval_steps_per_second": 5.775, "step": 2490000 }, { "epoch": 15.64, "learning_rate": 1.0919899285454158e-05, "loss": 2.9927, "step": 2490500 }, { "epoch": 15.64, "learning_rate": 1.0912050583315543e-05, "loss": 2.9908, "step": 2491000 }, { "epoch": 15.64, "learning_rate": 1.090420188117693e-05, "loss": 2.9898, "step": 2491500 }, { "epoch": 15.65, "learning_rate": 1.0896353179038314e-05, "loss": 2.989, "step": 2492000 }, { "epoch": 15.65, "learning_rate": 1.08885044768997e-05, "loss": 2.9917, "step": 2492500 }, { "epoch": 15.65, "learning_rate": 1.0880671472165363e-05, "loss": 2.9992, "step": 2493000 }, { "epoch": 15.66, "learning_rate": 1.0872822770026748e-05, "loss": 2.9894, "step": 2493500 }, { "epoch": 15.66, "learning_rate": 1.0864974067888135e-05, "loss": 2.999, "step": 2494000 }, { "epoch": 15.66, "learning_rate": 1.085712536574952e-05, "loss": 2.9932, "step": 2494500 }, { "epoch": 15.67, "learning_rate": 1.0849276663610906e-05, "loss": 2.9994, "step": 2495000 }, { "epoch": 15.67, "learning_rate": 1.0841427961472292e-05, "loss": 2.9956, "step": 2495500 }, { "epoch": 15.67, "learning_rate": 1.0833579259333677e-05, "loss": 2.998, "step": 2496000 }, { "epoch": 15.68, "learning_rate": 1.082574625459934e-05, "loss": 2.9901, "step": 2496500 }, { "epoch": 15.68, "learning_rate": 1.0817897552460726e-05, "loss": 2.9934, "step": 2497000 }, { "epoch": 15.68, "learning_rate": 1.0810048850322111e-05, "loss": 3.0019, "step": 2497500 }, { "epoch": 15.68, "learning_rate": 1.0802200148183498e-05, "loss": 2.9941, "step": 2498000 }, { "epoch": 15.69, "learning_rate": 1.0794367143449159e-05, "loss": 2.9951, "step": 2498500 }, { "epoch": 15.69, "learning_rate": 1.0786534138714822e-05, "loss": 2.991, "step": 2499000 }, { "epoch": 15.69, "learning_rate": 1.0778685436576208e-05, "loss": 2.9902, "step": 2499500 }, { "epoch": 15.7, "learning_rate": 1.0770836734437593e-05, "loss": 2.9974, "step": 2500000 }, { "epoch": 15.7, "learning_rate": 1.076298803229898e-05, "loss": 2.9945, "step": 2500500 }, { "epoch": 15.7, "learning_rate": 1.0755139330160364e-05, "loss": 2.9967, "step": 2501000 }, { "epoch": 15.71, "learning_rate": 1.074729062802175e-05, "loss": 2.9897, "step": 2501500 }, { "epoch": 15.71, "learning_rate": 1.0739441925883137e-05, "loss": 2.9928, "step": 2502000 }, { "epoch": 15.71, "learning_rate": 1.0731593223744522e-05, "loss": 2.9905, "step": 2502500 }, { "epoch": 15.72, "learning_rate": 1.0723744521605908e-05, "loss": 2.9955, "step": 2503000 }, { "epoch": 15.72, "learning_rate": 1.0715895819467293e-05, "loss": 2.9925, "step": 2503500 }, { "epoch": 15.72, "learning_rate": 1.070804711732868e-05, "loss": 2.9985, "step": 2504000 }, { "epoch": 15.73, "learning_rate": 1.0700198415190064e-05, "loss": 2.992, "step": 2504500 }, { "epoch": 15.73, "learning_rate": 1.0692365410455727e-05, "loss": 2.9916, "step": 2505000 }, { "epoch": 15.73, "learning_rate": 1.0684516708317114e-05, "loss": 2.9919, "step": 2505500 }, { "epoch": 15.74, "learning_rate": 1.0676668006178499e-05, "loss": 2.9919, "step": 2506000 }, { "epoch": 15.74, "learning_rate": 1.0668819304039885e-05, "loss": 2.9862, "step": 2506500 }, { "epoch": 15.74, "learning_rate": 1.066097060190127e-05, "loss": 2.9864, "step": 2507000 }, { "epoch": 15.74, "learning_rate": 1.0653121899762655e-05, "loss": 2.9943, "step": 2507500 }, { "epoch": 15.75, "learning_rate": 1.064528889502832e-05, "loss": 2.9893, "step": 2508000 }, { "epoch": 15.75, "learning_rate": 1.0637455890293982e-05, "loss": 2.9969, "step": 2508500 }, { "epoch": 15.75, "learning_rate": 1.0629607188155367e-05, "loss": 2.9927, "step": 2509000 }, { "epoch": 15.76, "learning_rate": 1.0621758486016754e-05, "loss": 2.9883, "step": 2509500 }, { "epoch": 15.76, "learning_rate": 1.0613909783878138e-05, "loss": 2.997, "step": 2510000 }, { "epoch": 15.76, "learning_rate": 1.0606061081739525e-05, "loss": 2.994, "step": 2510500 }, { "epoch": 15.77, "learning_rate": 1.059821237960091e-05, "loss": 2.9858, "step": 2511000 }, { "epoch": 15.77, "learning_rate": 1.0590379374866573e-05, "loss": 2.9923, "step": 2511500 }, { "epoch": 15.77, "learning_rate": 1.0582530672727959e-05, "loss": 2.9934, "step": 2512000 }, { "epoch": 15.78, "learning_rate": 1.0574681970589344e-05, "loss": 2.9929, "step": 2512500 }, { "epoch": 15.78, "learning_rate": 1.056683326845073e-05, "loss": 2.9953, "step": 2513000 }, { "epoch": 15.78, "learning_rate": 1.0558984566312115e-05, "loss": 2.9946, "step": 2513500 }, { "epoch": 15.79, "learning_rate": 1.05511358641735e-05, "loss": 2.9928, "step": 2514000 }, { "epoch": 15.79, "learning_rate": 1.0543287162034886e-05, "loss": 2.9941, "step": 2514500 }, { "epoch": 15.79, "learning_rate": 1.0535438459896272e-05, "loss": 2.9926, "step": 2515000 }, { "epoch": 15.79, "learning_rate": 1.0527589757757659e-05, "loss": 2.9934, "step": 2515500 }, { "epoch": 15.8, "learning_rate": 1.0519741055619044e-05, "loss": 2.9911, "step": 2516000 }, { "epoch": 15.8, "learning_rate": 1.0511908050884705e-05, "loss": 2.9947, "step": 2516500 }, { "epoch": 15.8, "learning_rate": 1.0504059348746093e-05, "loss": 2.9958, "step": 2517000 }, { "epoch": 15.81, "learning_rate": 1.0496210646607478e-05, "loss": 2.9946, "step": 2517500 }, { "epoch": 15.81, "learning_rate": 1.0488361944468864e-05, "loss": 3.0055, "step": 2518000 }, { "epoch": 15.81, "learning_rate": 1.0480528939734526e-05, "loss": 2.9947, "step": 2518500 }, { "epoch": 15.82, "learning_rate": 1.047268023759591e-05, "loss": 2.9912, "step": 2519000 }, { "epoch": 15.82, "learning_rate": 1.0464831535457299e-05, "loss": 2.99, "step": 2519500 }, { "epoch": 15.82, "learning_rate": 1.0456982833318683e-05, "loss": 2.9933, "step": 2520000 }, { "epoch": 15.82, "eval_accuracy": 0.46007843862939135, "eval_loss": 2.916163444519043, "eval_runtime": 1448.8416, "eval_samples_per_second": 57.84, "eval_steps_per_second": 5.785, "step": 2520000 }, { "epoch": 15.83, "learning_rate": 1.0449134131180068e-05, "loss": 2.9976, "step": 2520500 }, { "epoch": 15.83, "learning_rate": 1.0441285429041454e-05, "loss": 2.9898, "step": 2521000 }, { "epoch": 15.83, "learning_rate": 1.043343672690284e-05, "loss": 2.9928, "step": 2521500 }, { "epoch": 15.84, "learning_rate": 1.0425588024764226e-05, "loss": 2.991, "step": 2522000 }, { "epoch": 15.84, "learning_rate": 1.041773932262561e-05, "loss": 2.9955, "step": 2522500 }, { "epoch": 15.84, "learning_rate": 1.0409890620486997e-05, "loss": 2.9873, "step": 2523000 }, { "epoch": 15.84, "learning_rate": 1.0402041918348382e-05, "loss": 2.9975, "step": 2523500 }, { "epoch": 15.85, "learning_rate": 1.0394208913614045e-05, "loss": 3.0013, "step": 2524000 }, { "epoch": 15.85, "learning_rate": 1.0386360211475431e-05, "loss": 2.9901, "step": 2524500 }, { "epoch": 15.85, "learning_rate": 1.0378511509336816e-05, "loss": 2.9866, "step": 2525000 }, { "epoch": 15.86, "learning_rate": 1.0370662807198202e-05, "loss": 2.994, "step": 2525500 }, { "epoch": 15.86, "learning_rate": 1.0362814105059587e-05, "loss": 2.9863, "step": 2526000 }, { "epoch": 15.86, "learning_rate": 1.035498110032525e-05, "loss": 2.9848, "step": 2526500 }, { "epoch": 15.87, "learning_rate": 1.0347132398186637e-05, "loss": 2.9918, "step": 2527000 }, { "epoch": 15.87, "learning_rate": 1.03392993934523e-05, "loss": 2.9912, "step": 2527500 }, { "epoch": 15.87, "learning_rate": 1.0331450691313684e-05, "loss": 2.9894, "step": 2528000 }, { "epoch": 15.88, "learning_rate": 1.032360198917507e-05, "loss": 2.992, "step": 2528500 }, { "epoch": 15.88, "learning_rate": 1.0315753287036455e-05, "loss": 2.9904, "step": 2529000 }, { "epoch": 15.88, "learning_rate": 1.0307904584897842e-05, "loss": 2.9895, "step": 2529500 }, { "epoch": 15.89, "learning_rate": 1.0300071580163505e-05, "loss": 2.9974, "step": 2530000 }, { "epoch": 15.89, "learning_rate": 1.029222287802489e-05, "loss": 2.9892, "step": 2530500 }, { "epoch": 15.89, "learning_rate": 1.0284374175886276e-05, "loss": 2.993, "step": 2531000 }, { "epoch": 15.9, "learning_rate": 1.0276525473747661e-05, "loss": 3.0028, "step": 2531500 }, { "epoch": 15.9, "learning_rate": 1.0268676771609047e-05, "loss": 2.9915, "step": 2532000 }, { "epoch": 15.9, "learning_rate": 1.026084376687471e-05, "loss": 2.9954, "step": 2532500 }, { "epoch": 15.9, "learning_rate": 1.0252995064736095e-05, "loss": 2.9905, "step": 2533000 }, { "epoch": 15.91, "learning_rate": 1.0245146362597482e-05, "loss": 2.9938, "step": 2533500 }, { "epoch": 15.91, "learning_rate": 1.0237297660458866e-05, "loss": 2.9969, "step": 2534000 }, { "epoch": 15.91, "learning_rate": 1.0229448958320253e-05, "loss": 2.9903, "step": 2534500 }, { "epoch": 15.92, "learning_rate": 1.0221600256181638e-05, "loss": 2.9978, "step": 2535000 }, { "epoch": 15.92, "learning_rate": 1.02137672514473e-05, "loss": 2.986, "step": 2535500 }, { "epoch": 15.92, "learning_rate": 1.0205918549308687e-05, "loss": 2.9918, "step": 2536000 }, { "epoch": 15.93, "learning_rate": 1.0198069847170072e-05, "loss": 2.993, "step": 2536500 }, { "epoch": 15.93, "learning_rate": 1.0190221145031458e-05, "loss": 2.9867, "step": 2537000 }, { "epoch": 15.93, "learning_rate": 1.0182372442892845e-05, "loss": 2.9892, "step": 2537500 }, { "epoch": 15.94, "learning_rate": 1.0174539438158506e-05, "loss": 2.9957, "step": 2538000 }, { "epoch": 15.94, "learning_rate": 1.0166690736019892e-05, "loss": 2.9968, "step": 2538500 }, { "epoch": 15.94, "learning_rate": 1.0158842033881277e-05, "loss": 2.9949, "step": 2539000 }, { "epoch": 15.95, "learning_rate": 1.0150993331742664e-05, "loss": 2.9887, "step": 2539500 }, { "epoch": 15.95, "learning_rate": 1.014314462960405e-05, "loss": 2.9925, "step": 2540000 }, { "epoch": 15.95, "learning_rate": 1.0135295927465435e-05, "loss": 2.9926, "step": 2540500 }, { "epoch": 15.95, "learning_rate": 1.0127462922731098e-05, "loss": 2.9891, "step": 2541000 }, { "epoch": 15.96, "learning_rate": 1.0119614220592483e-05, "loss": 2.9921, "step": 2541500 }, { "epoch": 15.96, "learning_rate": 1.0111765518453869e-05, "loss": 2.9953, "step": 2542000 }, { "epoch": 15.96, "learning_rate": 1.0103916816315255e-05, "loss": 2.9905, "step": 2542500 }, { "epoch": 15.97, "learning_rate": 1.009606811417664e-05, "loss": 2.9978, "step": 2543000 }, { "epoch": 15.97, "learning_rate": 1.0088219412038027e-05, "loss": 2.9943, "step": 2543500 }, { "epoch": 15.97, "learning_rate": 1.008038640730369e-05, "loss": 2.9927, "step": 2544000 }, { "epoch": 15.98, "learning_rate": 1.0072537705165074e-05, "loss": 2.9948, "step": 2544500 }, { "epoch": 15.98, "learning_rate": 1.0064689003026461e-05, "loss": 2.9976, "step": 2545000 }, { "epoch": 15.98, "learning_rate": 1.0056840300887846e-05, "loss": 2.9912, "step": 2545500 }, { "epoch": 15.99, "learning_rate": 1.0048991598749232e-05, "loss": 2.99, "step": 2546000 }, { "epoch": 15.99, "learning_rate": 1.0041142896610617e-05, "loss": 2.9948, "step": 2546500 }, { "epoch": 15.99, "learning_rate": 1.0033294194472003e-05, "loss": 2.988, "step": 2547000 }, { "epoch": 16.0, "learning_rate": 1.0025445492333388e-05, "loss": 2.9954, "step": 2547500 }, { "epoch": 16.0, "learning_rate": 1.0017596790194773e-05, "loss": 2.9989, "step": 2548000 }, { "epoch": 16.0, "learning_rate": 1.0009748088056159e-05, "loss": 2.987, "step": 2548500 }, { "epoch": 16.01, "learning_rate": 1.0001899385917546e-05, "loss": 2.9777, "step": 2549000 }, { "epoch": 16.01, "learning_rate": 9.994066381183207e-06, "loss": 2.9749, "step": 2549500 }, { "epoch": 16.01, "learning_rate": 9.986217679044593e-06, "loss": 2.9739, "step": 2550000 }, { "epoch": 16.01, "eval_accuracy": 0.46034874571295914, "eval_loss": 2.9151387214660645, "eval_runtime": 1451.7391, "eval_samples_per_second": 57.725, "eval_steps_per_second": 5.773, "step": 2550000 }, { "epoch": 16.01, "learning_rate": 9.978368976905978e-06, "loss": 2.9735, "step": 2550500 }, { "epoch": 16.02, "learning_rate": 9.970520274767365e-06, "loss": 2.983, "step": 2551000 }, { "epoch": 16.02, "learning_rate": 9.962687270033028e-06, "loss": 2.9821, "step": 2551500 }, { "epoch": 16.02, "learning_rate": 9.954838567894412e-06, "loss": 2.9807, "step": 2552000 }, { "epoch": 16.03, "learning_rate": 9.946989865755799e-06, "loss": 2.9807, "step": 2552500 }, { "epoch": 16.03, "learning_rate": 9.939141163617184e-06, "loss": 2.9752, "step": 2553000 }, { "epoch": 16.03, "learning_rate": 9.931292461478572e-06, "loss": 2.9849, "step": 2553500 }, { "epoch": 16.04, "learning_rate": 9.923459456744233e-06, "loss": 2.9779, "step": 2554000 }, { "epoch": 16.04, "learning_rate": 9.915610754605618e-06, "loss": 2.9801, "step": 2554500 }, { "epoch": 16.04, "learning_rate": 9.907762052467004e-06, "loss": 2.9787, "step": 2555000 }, { "epoch": 16.05, "learning_rate": 9.89991335032839e-06, "loss": 2.9739, "step": 2555500 }, { "epoch": 16.05, "learning_rate": 9.892064648189777e-06, "loss": 2.9802, "step": 2556000 }, { "epoch": 16.05, "learning_rate": 9.884231643455438e-06, "loss": 2.988, "step": 2556500 }, { "epoch": 16.06, "learning_rate": 9.876382941316823e-06, "loss": 2.9783, "step": 2557000 }, { "epoch": 16.06, "learning_rate": 9.86853423917821e-06, "loss": 2.9773, "step": 2557500 }, { "epoch": 16.06, "learning_rate": 9.860685537039596e-06, "loss": 2.9788, "step": 2558000 }, { "epoch": 16.06, "learning_rate": 9.852836834900983e-06, "loss": 2.9793, "step": 2558500 }, { "epoch": 16.07, "learning_rate": 9.844988132762367e-06, "loss": 2.9747, "step": 2559000 }, { "epoch": 16.07, "learning_rate": 9.837139430623752e-06, "loss": 2.9788, "step": 2559500 }, { "epoch": 16.07, "learning_rate": 9.829306425889417e-06, "loss": 2.983, "step": 2560000 }, { "epoch": 16.08, "learning_rate": 9.821457723750801e-06, "loss": 2.982, "step": 2560500 }, { "epoch": 16.08, "learning_rate": 9.813609021612186e-06, "loss": 2.979, "step": 2561000 }, { "epoch": 16.08, "learning_rate": 9.805760319473573e-06, "loss": 2.9784, "step": 2561500 }, { "epoch": 16.09, "learning_rate": 9.797927314739236e-06, "loss": 2.9853, "step": 2562000 }, { "epoch": 16.09, "learning_rate": 9.790078612600622e-06, "loss": 2.9816, "step": 2562500 }, { "epoch": 16.09, "learning_rate": 9.782229910462007e-06, "loss": 2.9758, "step": 2563000 }, { "epoch": 16.1, "learning_rate": 9.774381208323392e-06, "loss": 2.9816, "step": 2563500 }, { "epoch": 16.1, "learning_rate": 9.766532506184778e-06, "loss": 2.9885, "step": 2564000 }, { "epoch": 16.1, "learning_rate": 9.758683804046163e-06, "loss": 2.9761, "step": 2564500 }, { "epoch": 16.11, "learning_rate": 9.75083510190755e-06, "loss": 2.9844, "step": 2565000 }, { "epoch": 16.11, "learning_rate": 9.742986399768934e-06, "loss": 2.98, "step": 2565500 }, { "epoch": 16.11, "learning_rate": 9.735153395034597e-06, "loss": 2.984, "step": 2566000 }, { "epoch": 16.11, "learning_rate": 9.727320390300262e-06, "loss": 2.9749, "step": 2566500 }, { "epoch": 16.12, "learning_rate": 9.719471688161647e-06, "loss": 2.982, "step": 2567000 }, { "epoch": 16.12, "learning_rate": 9.711622986023031e-06, "loss": 2.9849, "step": 2567500 }, { "epoch": 16.12, "learning_rate": 9.703774283884418e-06, "loss": 2.9796, "step": 2568000 }, { "epoch": 16.13, "learning_rate": 9.695925581745802e-06, "loss": 2.9887, "step": 2568500 }, { "epoch": 16.13, "learning_rate": 9.688076879607189e-06, "loss": 2.9804, "step": 2569000 }, { "epoch": 16.13, "learning_rate": 9.680228177468574e-06, "loss": 2.984, "step": 2569500 }, { "epoch": 16.14, "learning_rate": 9.672395172734237e-06, "loss": 2.9793, "step": 2570000 }, { "epoch": 16.14, "learning_rate": 9.664546470595623e-06, "loss": 2.986, "step": 2570500 }, { "epoch": 16.14, "learning_rate": 9.656697768457008e-06, "loss": 2.9818, "step": 2571000 }, { "epoch": 16.15, "learning_rate": 9.648849066318394e-06, "loss": 2.9829, "step": 2571500 }, { "epoch": 16.15, "learning_rate": 9.641000364179779e-06, "loss": 2.9803, "step": 2572000 }, { "epoch": 16.15, "learning_rate": 9.633167359445442e-06, "loss": 2.9824, "step": 2572500 }, { "epoch": 16.16, "learning_rate": 9.625334354711107e-06, "loss": 2.978, "step": 2573000 }, { "epoch": 16.16, "learning_rate": 9.617485652572492e-06, "loss": 2.988, "step": 2573500 }, { "epoch": 16.16, "learning_rate": 9.609636950433876e-06, "loss": 2.9845, "step": 2574000 }, { "epoch": 16.17, "learning_rate": 9.601788248295263e-06, "loss": 2.9869, "step": 2574500 }, { "epoch": 16.17, "learning_rate": 9.593939546156648e-06, "loss": 2.9857, "step": 2575000 }, { "epoch": 16.17, "learning_rate": 9.58610654142231e-06, "loss": 2.9885, "step": 2575500 }, { "epoch": 16.17, "learning_rate": 9.578257839283697e-06, "loss": 2.9891, "step": 2576000 }, { "epoch": 16.18, "learning_rate": 9.570409137145082e-06, "loss": 2.9708, "step": 2576500 }, { "epoch": 16.18, "learning_rate": 9.562560435006468e-06, "loss": 2.984, "step": 2577000 }, { "epoch": 16.18, "learning_rate": 9.554711732867853e-06, "loss": 2.9776, "step": 2577500 }, { "epoch": 16.19, "learning_rate": 9.54686303072924e-06, "loss": 2.9882, "step": 2578000 }, { "epoch": 16.19, "learning_rate": 9.539014328590624e-06, "loss": 2.988, "step": 2578500 }, { "epoch": 16.19, "learning_rate": 9.53116562645201e-06, "loss": 2.978, "step": 2579000 }, { "epoch": 16.2, "learning_rate": 9.523316924313395e-06, "loss": 2.9823, "step": 2579500 }, { "epoch": 16.2, "learning_rate": 9.515468222174782e-06, "loss": 2.9806, "step": 2580000 }, { "epoch": 16.2, "eval_accuracy": 0.46043968404737834, "eval_loss": 2.9136369228363037, "eval_runtime": 1450.735, "eval_samples_per_second": 57.765, "eval_steps_per_second": 5.777, "step": 2580000 }, { "epoch": 16.2, "learning_rate": 9.507635217440445e-06, "loss": 2.9848, "step": 2580500 }, { "epoch": 16.21, "learning_rate": 9.49978651530183e-06, "loss": 2.9767, "step": 2581000 }, { "epoch": 16.21, "learning_rate": 9.491937813163216e-06, "loss": 2.9746, "step": 2581500 }, { "epoch": 16.21, "learning_rate": 9.4840891110246e-06, "loss": 2.9822, "step": 2582000 }, { "epoch": 16.22, "learning_rate": 9.476240408885987e-06, "loss": 2.9836, "step": 2582500 }, { "epoch": 16.22, "learning_rate": 9.468391706747374e-06, "loss": 2.9834, "step": 2583000 }, { "epoch": 16.22, "learning_rate": 9.460543004608758e-06, "loss": 2.9801, "step": 2583500 }, { "epoch": 16.22, "learning_rate": 9.452694302470145e-06, "loss": 2.9806, "step": 2584000 }, { "epoch": 16.23, "learning_rate": 9.444861297735808e-06, "loss": 2.9876, "step": 2584500 }, { "epoch": 16.23, "learning_rate": 9.43702829300147e-06, "loss": 2.9803, "step": 2585000 }, { "epoch": 16.23, "learning_rate": 9.429179590862856e-06, "loss": 2.9785, "step": 2585500 }, { "epoch": 16.24, "learning_rate": 9.42133088872424e-06, "loss": 2.9874, "step": 2586000 }, { "epoch": 16.24, "learning_rate": 9.413482186585625e-06, "loss": 2.9808, "step": 2586500 }, { "epoch": 16.24, "learning_rate": 9.405633484447013e-06, "loss": 2.981, "step": 2587000 }, { "epoch": 16.25, "learning_rate": 9.397800479712675e-06, "loss": 2.981, "step": 2587500 }, { "epoch": 16.25, "learning_rate": 9.389967474978338e-06, "loss": 2.9846, "step": 2588000 }, { "epoch": 16.25, "learning_rate": 9.382118772839724e-06, "loss": 2.9853, "step": 2588500 }, { "epoch": 16.26, "learning_rate": 9.374270070701109e-06, "loss": 2.9871, "step": 2589000 }, { "epoch": 16.26, "learning_rate": 9.366421368562495e-06, "loss": 2.9809, "step": 2589500 }, { "epoch": 16.26, "learning_rate": 9.35857266642388e-06, "loss": 2.9842, "step": 2590000 }, { "epoch": 16.27, "learning_rate": 9.350723964285265e-06, "loss": 2.9844, "step": 2590500 }, { "epoch": 16.27, "learning_rate": 9.342875262146653e-06, "loss": 2.9799, "step": 2591000 }, { "epoch": 16.27, "learning_rate": 9.335026560008038e-06, "loss": 2.9859, "step": 2591500 }, { "epoch": 16.28, "learning_rate": 9.3271935552737e-06, "loss": 2.9805, "step": 2592000 }, { "epoch": 16.28, "learning_rate": 9.319344853135085e-06, "loss": 2.9841, "step": 2592500 }, { "epoch": 16.28, "learning_rate": 9.31149615099647e-06, "loss": 2.979, "step": 2593000 }, { "epoch": 16.28, "learning_rate": 9.303647448857858e-06, "loss": 2.9811, "step": 2593500 }, { "epoch": 16.29, "learning_rate": 9.295798746719243e-06, "loss": 2.9834, "step": 2594000 }, { "epoch": 16.29, "learning_rate": 9.28795004458063e-06, "loss": 2.9792, "step": 2594500 }, { "epoch": 16.29, "learning_rate": 9.280101342442014e-06, "loss": 2.9843, "step": 2595000 }, { "epoch": 16.3, "learning_rate": 9.2722526403034e-06, "loss": 2.9908, "step": 2595500 }, { "epoch": 16.3, "learning_rate": 9.264419635569064e-06, "loss": 2.9869, "step": 2596000 }, { "epoch": 16.3, "learning_rate": 9.256570933430449e-06, "loss": 2.9789, "step": 2596500 }, { "epoch": 16.31, "learning_rate": 9.248722231291835e-06, "loss": 2.9855, "step": 2597000 }, { "epoch": 16.31, "learning_rate": 9.24087352915322e-06, "loss": 2.9759, "step": 2597500 }, { "epoch": 16.31, "learning_rate": 9.233024827014604e-06, "loss": 2.9887, "step": 2598000 }, { "epoch": 16.32, "learning_rate": 9.225176124875991e-06, "loss": 2.9875, "step": 2598500 }, { "epoch": 16.32, "learning_rate": 9.217343120141654e-06, "loss": 2.9823, "step": 2599000 }, { "epoch": 16.32, "learning_rate": 9.209510115407315e-06, "loss": 2.9882, "step": 2599500 }, { "epoch": 16.33, "learning_rate": 9.201661413268703e-06, "loss": 2.9866, "step": 2600000 }, { "epoch": 16.33, "learning_rate": 9.193812711130088e-06, "loss": 2.9889, "step": 2600500 }, { "epoch": 16.33, "learning_rate": 9.185964008991475e-06, "loss": 2.9832, "step": 2601000 }, { "epoch": 16.33, "learning_rate": 9.17811530685286e-06, "loss": 2.9887, "step": 2601500 }, { "epoch": 16.34, "learning_rate": 9.170266604714244e-06, "loss": 2.9903, "step": 2602000 }, { "epoch": 16.34, "learning_rate": 9.16241790257563e-06, "loss": 2.9792, "step": 2602500 }, { "epoch": 16.34, "learning_rate": 9.154569200437015e-06, "loss": 2.9865, "step": 2603000 }, { "epoch": 16.35, "learning_rate": 9.14673619570268e-06, "loss": 2.9945, "step": 2603500 }, { "epoch": 16.35, "learning_rate": 9.138887493564065e-06, "loss": 2.9865, "step": 2604000 }, { "epoch": 16.35, "learning_rate": 9.13103879142545e-06, "loss": 2.9874, "step": 2604500 }, { "epoch": 16.36, "learning_rate": 9.123205786691114e-06, "loss": 2.9831, "step": 2605000 }, { "epoch": 16.36, "learning_rate": 9.115357084552499e-06, "loss": 2.975, "step": 2605500 }, { "epoch": 16.36, "learning_rate": 9.107508382413884e-06, "loss": 2.9826, "step": 2606000 }, { "epoch": 16.37, "learning_rate": 9.09965968027527e-06, "loss": 2.9882, "step": 2606500 }, { "epoch": 16.37, "learning_rate": 9.091810978136655e-06, "loss": 2.9868, "step": 2607000 }, { "epoch": 16.37, "learning_rate": 9.083962275998041e-06, "loss": 2.9843, "step": 2607500 }, { "epoch": 16.38, "learning_rate": 9.076113573859426e-06, "loss": 2.9827, "step": 2608000 }, { "epoch": 16.38, "learning_rate": 9.068264871720813e-06, "loss": 2.9845, "step": 2608500 }, { "epoch": 16.38, "learning_rate": 9.060416169582197e-06, "loss": 2.99, "step": 2609000 }, { "epoch": 16.38, "learning_rate": 9.052567467443584e-06, "loss": 2.9848, "step": 2609500 }, { "epoch": 16.39, "learning_rate": 9.04471876530497e-06, "loss": 2.9818, "step": 2610000 }, { "epoch": 16.39, "eval_accuracy": 0.4606046351744975, "eval_loss": 2.9124929904937744, "eval_runtime": 1450.2407, "eval_samples_per_second": 57.784, "eval_steps_per_second": 5.779, "step": 2610000 }, { "epoch": 16.39, "learning_rate": 9.036870063166355e-06, "loss": 2.9772, "step": 2610500 }, { "epoch": 16.39, "learning_rate": 9.029021361027741e-06, "loss": 2.9846, "step": 2611000 }, { "epoch": 16.4, "learning_rate": 9.021188356293404e-06, "loss": 2.9878, "step": 2611500 }, { "epoch": 16.4, "learning_rate": 9.013339654154789e-06, "loss": 2.9764, "step": 2612000 }, { "epoch": 16.4, "learning_rate": 9.005490952016176e-06, "loss": 2.984, "step": 2612500 }, { "epoch": 16.41, "learning_rate": 8.99764224987756e-06, "loss": 2.9857, "step": 2613000 }, { "epoch": 16.41, "learning_rate": 8.989793547738947e-06, "loss": 2.9838, "step": 2613500 }, { "epoch": 16.41, "learning_rate": 8.981944845600332e-06, "loss": 2.9874, "step": 2614000 }, { "epoch": 16.42, "learning_rate": 8.974111840865995e-06, "loss": 2.9812, "step": 2614500 }, { "epoch": 16.42, "learning_rate": 8.966263138727381e-06, "loss": 2.988, "step": 2615000 }, { "epoch": 16.42, "learning_rate": 8.958414436588766e-06, "loss": 2.979, "step": 2615500 }, { "epoch": 16.43, "learning_rate": 8.950565734450152e-06, "loss": 2.9807, "step": 2616000 }, { "epoch": 16.43, "learning_rate": 8.942717032311537e-06, "loss": 2.9796, "step": 2616500 }, { "epoch": 16.43, "learning_rate": 8.934868330172923e-06, "loss": 2.9809, "step": 2617000 }, { "epoch": 16.44, "learning_rate": 8.927019628034308e-06, "loss": 2.9852, "step": 2617500 }, { "epoch": 16.44, "learning_rate": 8.919186623299971e-06, "loss": 2.9946, "step": 2618000 }, { "epoch": 16.44, "learning_rate": 8.911337921161358e-06, "loss": 2.9827, "step": 2618500 }, { "epoch": 16.44, "learning_rate": 8.903489219022742e-06, "loss": 2.9855, "step": 2619000 }, { "epoch": 16.45, "learning_rate": 8.895656214288405e-06, "loss": 2.9781, "step": 2619500 }, { "epoch": 16.45, "learning_rate": 8.887807512149792e-06, "loss": 2.9825, "step": 2620000 }, { "epoch": 16.45, "learning_rate": 8.879958810011177e-06, "loss": 2.9843, "step": 2620500 }, { "epoch": 16.46, "learning_rate": 8.872110107872563e-06, "loss": 2.9865, "step": 2621000 }, { "epoch": 16.46, "learning_rate": 8.864261405733948e-06, "loss": 2.9812, "step": 2621500 }, { "epoch": 16.46, "learning_rate": 8.85642840099961e-06, "loss": 2.9789, "step": 2622000 }, { "epoch": 16.47, "learning_rate": 8.848579698860997e-06, "loss": 2.9809, "step": 2622500 }, { "epoch": 16.47, "learning_rate": 8.840730996722382e-06, "loss": 2.9862, "step": 2623000 }, { "epoch": 16.47, "learning_rate": 8.832882294583768e-06, "loss": 2.9849, "step": 2623500 }, { "epoch": 16.48, "learning_rate": 8.825033592445153e-06, "loss": 2.9796, "step": 2624000 }, { "epoch": 16.48, "learning_rate": 8.81718489030654e-06, "loss": 2.9803, "step": 2624500 }, { "epoch": 16.48, "learning_rate": 8.809336188167926e-06, "loss": 2.9888, "step": 2625000 }, { "epoch": 16.49, "learning_rate": 8.80148748602931e-06, "loss": 2.9854, "step": 2625500 }, { "epoch": 16.49, "learning_rate": 8.793638783890697e-06, "loss": 2.9908, "step": 2626000 }, { "epoch": 16.49, "learning_rate": 8.785790081752082e-06, "loss": 2.9777, "step": 2626500 }, { "epoch": 16.49, "learning_rate": 8.777957077017743e-06, "loss": 2.9804, "step": 2627000 }, { "epoch": 16.5, "learning_rate": 8.770108374879131e-06, "loss": 2.9774, "step": 2627500 }, { "epoch": 16.5, "learning_rate": 8.762259672740516e-06, "loss": 2.9782, "step": 2628000 }, { "epoch": 16.5, "learning_rate": 8.754410970601903e-06, "loss": 2.982, "step": 2628500 }, { "epoch": 16.51, "learning_rate": 8.746562268463287e-06, "loss": 2.9828, "step": 2629000 }, { "epoch": 16.51, "learning_rate": 8.738713566324674e-06, "loss": 2.9889, "step": 2629500 }, { "epoch": 16.51, "learning_rate": 8.730864864186059e-06, "loss": 2.9886, "step": 2630000 }, { "epoch": 16.52, "learning_rate": 8.723031859451722e-06, "loss": 2.9779, "step": 2630500 }, { "epoch": 16.52, "learning_rate": 8.715183157313108e-06, "loss": 2.9824, "step": 2631000 }, { "epoch": 16.52, "learning_rate": 8.707334455174493e-06, "loss": 2.9876, "step": 2631500 }, { "epoch": 16.53, "learning_rate": 8.699485753035878e-06, "loss": 2.982, "step": 2632000 }, { "epoch": 16.53, "learning_rate": 8.691637050897264e-06, "loss": 2.9809, "step": 2632500 }, { "epoch": 16.53, "learning_rate": 8.683788348758649e-06, "loss": 2.9812, "step": 2633000 }, { "epoch": 16.54, "learning_rate": 8.675939646620035e-06, "loss": 2.9873, "step": 2633500 }, { "epoch": 16.54, "learning_rate": 8.66809094448142e-06, "loss": 2.9867, "step": 2634000 }, { "epoch": 16.54, "learning_rate": 8.660257939747083e-06, "loss": 2.9881, "step": 2634500 }, { "epoch": 16.55, "learning_rate": 8.65240923760847e-06, "loss": 2.9811, "step": 2635000 }, { "epoch": 16.55, "learning_rate": 8.644560535469854e-06, "loss": 2.9828, "step": 2635500 }, { "epoch": 16.55, "learning_rate": 8.63671183333124e-06, "loss": 2.9869, "step": 2636000 }, { "epoch": 16.55, "learning_rate": 8.628863131192625e-06, "loss": 2.9865, "step": 2636500 }, { "epoch": 16.56, "learning_rate": 8.621014429054013e-06, "loss": 2.9778, "step": 2637000 }, { "epoch": 16.56, "learning_rate": 8.613197121723953e-06, "loss": 2.9824, "step": 2637500 }, { "epoch": 16.56, "learning_rate": 8.605348419585338e-06, "loss": 2.9868, "step": 2638000 }, { "epoch": 16.57, "learning_rate": 8.597499717446723e-06, "loss": 2.9827, "step": 2638500 }, { "epoch": 16.57, "learning_rate": 8.589651015308109e-06, "loss": 2.9852, "step": 2639000 }, { "epoch": 16.57, "learning_rate": 8.581802313169494e-06, "loss": 2.9839, "step": 2639500 }, { "epoch": 16.58, "learning_rate": 8.57395361103088e-06, "loss": 2.9897, "step": 2640000 }, { "epoch": 16.58, "eval_accuracy": 0.4608634991454351, "eval_loss": 2.911041021347046, "eval_runtime": 1449.8893, "eval_samples_per_second": 57.798, "eval_steps_per_second": 5.78, "step": 2640000 }, { "epoch": 16.58, "learning_rate": 8.566104908892265e-06, "loss": 2.9857, "step": 2640500 }, { "epoch": 16.58, "learning_rate": 8.558256206753653e-06, "loss": 2.9867, "step": 2641000 }, { "epoch": 16.59, "learning_rate": 8.550407504615038e-06, "loss": 2.9874, "step": 2641500 }, { "epoch": 16.59, "learning_rate": 8.542558802476423e-06, "loss": 2.9742, "step": 2642000 }, { "epoch": 16.59, "learning_rate": 8.534710100337809e-06, "loss": 2.9852, "step": 2642500 }, { "epoch": 16.6, "learning_rate": 8.526861398199194e-06, "loss": 2.9842, "step": 2643000 }, { "epoch": 16.6, "learning_rate": 8.51901269606058e-06, "loss": 2.9824, "step": 2643500 }, { "epoch": 16.6, "learning_rate": 8.511211086134798e-06, "loss": 2.979, "step": 2644000 }, { "epoch": 16.6, "learning_rate": 8.503362383996183e-06, "loss": 2.9851, "step": 2644500 }, { "epoch": 16.61, "learning_rate": 8.495513681857568e-06, "loss": 2.9904, "step": 2645000 }, { "epoch": 16.61, "learning_rate": 8.487664979718954e-06, "loss": 2.9856, "step": 2645500 }, { "epoch": 16.61, "learning_rate": 8.479831974984617e-06, "loss": 2.9846, "step": 2646000 }, { "epoch": 16.62, "learning_rate": 8.471983272846002e-06, "loss": 2.9878, "step": 2646500 }, { "epoch": 16.62, "learning_rate": 8.464134570707388e-06, "loss": 2.988, "step": 2647000 }, { "epoch": 16.62, "learning_rate": 8.456285868568773e-06, "loss": 2.9812, "step": 2647500 }, { "epoch": 16.63, "learning_rate": 8.44843716643016e-06, "loss": 2.9818, "step": 2648000 }, { "epoch": 16.63, "learning_rate": 8.440588464291544e-06, "loss": 2.9943, "step": 2648500 }, { "epoch": 16.63, "learning_rate": 8.43273976215293e-06, "loss": 2.98, "step": 2649000 }, { "epoch": 16.64, "learning_rate": 8.424891060014315e-06, "loss": 2.9786, "step": 2649500 }, { "epoch": 16.64, "learning_rate": 8.417042357875702e-06, "loss": 2.9764, "step": 2650000 }, { "epoch": 16.64, "learning_rate": 8.409193655737088e-06, "loss": 2.9862, "step": 2650500 }, { "epoch": 16.65, "learning_rate": 8.401344953598473e-06, "loss": 2.9897, "step": 2651000 }, { "epoch": 16.65, "learning_rate": 8.39349625145986e-06, "loss": 2.9836, "step": 2651500 }, { "epoch": 16.65, "learning_rate": 8.385647549321244e-06, "loss": 2.9869, "step": 2652000 }, { "epoch": 16.65, "learning_rate": 8.37779884718263e-06, "loss": 2.9776, "step": 2652500 }, { "epoch": 16.66, "learning_rate": 8.369950145044015e-06, "loss": 2.9815, "step": 2653000 }, { "epoch": 16.66, "learning_rate": 8.362101442905402e-06, "loss": 2.9862, "step": 2653500 }, { "epoch": 16.66, "learning_rate": 8.354284135575342e-06, "loss": 2.9799, "step": 2654000 }, { "epoch": 16.67, "learning_rate": 8.346435433436728e-06, "loss": 2.9867, "step": 2654500 }, { "epoch": 16.67, "learning_rate": 8.338586731298113e-06, "loss": 2.9903, "step": 2655000 }, { "epoch": 16.67, "learning_rate": 8.3307380291595e-06, "loss": 2.9842, "step": 2655500 }, { "epoch": 16.68, "learning_rate": 8.322889327020884e-06, "loss": 2.9875, "step": 2656000 }, { "epoch": 16.68, "learning_rate": 8.315056322286547e-06, "loss": 2.9857, "step": 2656500 }, { "epoch": 16.68, "learning_rate": 8.307207620147933e-06, "loss": 2.9936, "step": 2657000 }, { "epoch": 16.69, "learning_rate": 8.299374615413595e-06, "loss": 2.9781, "step": 2657500 }, { "epoch": 16.69, "learning_rate": 8.291525913274981e-06, "loss": 2.9784, "step": 2658000 }, { "epoch": 16.69, "learning_rate": 8.283677211136368e-06, "loss": 2.9831, "step": 2658500 }, { "epoch": 16.7, "learning_rate": 8.275828508997752e-06, "loss": 2.9918, "step": 2659000 }, { "epoch": 16.7, "learning_rate": 8.267979806859139e-06, "loss": 2.9857, "step": 2659500 }, { "epoch": 16.7, "learning_rate": 8.260131104720524e-06, "loss": 2.9833, "step": 2660000 }, { "epoch": 16.71, "learning_rate": 8.25228240258191e-06, "loss": 2.9868, "step": 2660500 }, { "epoch": 16.71, "learning_rate": 8.244433700443295e-06, "loss": 2.9882, "step": 2661000 }, { "epoch": 16.71, "learning_rate": 8.236600695708958e-06, "loss": 2.977, "step": 2661500 }, { "epoch": 16.71, "learning_rate": 8.228751993570344e-06, "loss": 2.9773, "step": 2662000 }, { "epoch": 16.72, "learning_rate": 8.220903291431729e-06, "loss": 2.9779, "step": 2662500 }, { "epoch": 16.72, "learning_rate": 8.213054589293115e-06, "loss": 2.9861, "step": 2663000 }, { "epoch": 16.72, "learning_rate": 8.2052058871545e-06, "loss": 2.986, "step": 2663500 }, { "epoch": 16.73, "learning_rate": 8.197357185015887e-06, "loss": 2.9874, "step": 2664000 }, { "epoch": 16.73, "learning_rate": 8.189508482877271e-06, "loss": 2.9937, "step": 2664500 }, { "epoch": 16.73, "learning_rate": 8.181659780738656e-06, "loss": 2.9836, "step": 2665000 }, { "epoch": 16.74, "learning_rate": 8.173811078600043e-06, "loss": 2.9851, "step": 2665500 }, { "epoch": 16.74, "learning_rate": 8.165978073865706e-06, "loss": 2.982, "step": 2666000 }, { "epoch": 16.74, "learning_rate": 8.158129371727092e-06, "loss": 2.989, "step": 2666500 }, { "epoch": 16.75, "learning_rate": 8.150280669588477e-06, "loss": 2.9775, "step": 2667000 }, { "epoch": 16.75, "learning_rate": 8.142431967449862e-06, "loss": 2.9878, "step": 2667500 }, { "epoch": 16.75, "learning_rate": 8.13458326531125e-06, "loss": 2.9843, "step": 2668000 }, { "epoch": 16.76, "learning_rate": 8.126734563172634e-06, "loss": 2.9816, "step": 2668500 }, { "epoch": 16.76, "learning_rate": 8.11888586103402e-06, "loss": 2.9798, "step": 2669000 }, { "epoch": 16.76, "learning_rate": 8.111037158895406e-06, "loss": 2.9854, "step": 2669500 }, { "epoch": 16.76, "learning_rate": 8.103204154161069e-06, "loss": 2.9843, "step": 2670000 }, { "epoch": 16.76, "eval_accuracy": 0.46105567578211487, "eval_loss": 2.909193277359009, "eval_runtime": 1451.2484, "eval_samples_per_second": 57.744, "eval_steps_per_second": 5.775, "step": 2670000 }, { "epoch": 16.77, "learning_rate": 8.095355452022455e-06, "loss": 2.9865, "step": 2670500 }, { "epoch": 16.77, "learning_rate": 8.08750674988384e-06, "loss": 2.9883, "step": 2671000 }, { "epoch": 16.77, "learning_rate": 8.079658047745226e-06, "loss": 2.9816, "step": 2671500 }, { "epoch": 16.78, "learning_rate": 8.071825043010888e-06, "loss": 2.99, "step": 2672000 }, { "epoch": 16.78, "learning_rate": 8.063976340872274e-06, "loss": 2.9774, "step": 2672500 }, { "epoch": 16.78, "learning_rate": 8.05612763873366e-06, "loss": 2.9833, "step": 2673000 }, { "epoch": 16.79, "learning_rate": 8.048278936595045e-06, "loss": 2.9863, "step": 2673500 }, { "epoch": 16.79, "learning_rate": 8.040445931860707e-06, "loss": 2.9829, "step": 2674000 }, { "epoch": 16.79, "learning_rate": 8.032612927126371e-06, "loss": 2.9801, "step": 2674500 }, { "epoch": 16.8, "learning_rate": 8.024764224987756e-06, "loss": 2.9774, "step": 2675000 }, { "epoch": 16.8, "learning_rate": 8.01691552284914e-06, "loss": 2.9825, "step": 2675500 }, { "epoch": 16.8, "learning_rate": 8.009066820710527e-06, "loss": 2.9828, "step": 2676000 }, { "epoch": 16.81, "learning_rate": 8.001218118571914e-06, "loss": 2.9902, "step": 2676500 }, { "epoch": 16.81, "learning_rate": 7.9933694164333e-06, "loss": 2.9786, "step": 2677000 }, { "epoch": 16.81, "learning_rate": 7.985520714294685e-06, "loss": 2.9849, "step": 2677500 }, { "epoch": 16.82, "learning_rate": 7.977672012156071e-06, "loss": 2.989, "step": 2678000 }, { "epoch": 16.82, "learning_rate": 7.969823310017456e-06, "loss": 2.9838, "step": 2678500 }, { "epoch": 16.82, "learning_rate": 7.96197460787884e-06, "loss": 2.9829, "step": 2679000 }, { "epoch": 16.82, "learning_rate": 7.954141603144506e-06, "loss": 2.9887, "step": 2679500 }, { "epoch": 16.83, "learning_rate": 7.94629290100589e-06, "loss": 2.989, "step": 2680000 }, { "epoch": 16.83, "learning_rate": 7.938459896271552e-06, "loss": 2.9853, "step": 2680500 }, { "epoch": 16.83, "learning_rate": 7.93061119413294e-06, "loss": 2.9798, "step": 2681000 }, { "epoch": 16.84, "learning_rate": 7.922762491994325e-06, "loss": 2.9882, "step": 2681500 }, { "epoch": 16.84, "learning_rate": 7.914913789855711e-06, "loss": 2.9816, "step": 2682000 }, { "epoch": 16.84, "learning_rate": 7.907065087717096e-06, "loss": 2.9855, "step": 2682500 }, { "epoch": 16.85, "learning_rate": 7.89921638557848e-06, "loss": 2.9796, "step": 2683000 }, { "epoch": 16.85, "learning_rate": 7.891367683439867e-06, "loss": 2.9815, "step": 2683500 }, { "epoch": 16.85, "learning_rate": 7.883518981301252e-06, "loss": 2.9831, "step": 2684000 }, { "epoch": 16.86, "learning_rate": 7.875685976566915e-06, "loss": 2.9835, "step": 2684500 }, { "epoch": 16.86, "learning_rate": 7.867837274428301e-06, "loss": 2.9796, "step": 2685000 }, { "epoch": 16.86, "learning_rate": 7.859988572289686e-06, "loss": 2.9861, "step": 2685500 }, { "epoch": 16.87, "learning_rate": 7.852139870151072e-06, "loss": 2.9811, "step": 2686000 }, { "epoch": 16.87, "learning_rate": 7.844306865416735e-06, "loss": 2.9875, "step": 2686500 }, { "epoch": 16.87, "learning_rate": 7.83645816327812e-06, "loss": 2.979, "step": 2687000 }, { "epoch": 16.87, "learning_rate": 7.828609461139507e-06, "loss": 2.9817, "step": 2687500 }, { "epoch": 16.88, "learning_rate": 7.820760759000891e-06, "loss": 2.9842, "step": 2688000 }, { "epoch": 16.88, "learning_rate": 7.812927754266556e-06, "loss": 2.9813, "step": 2688500 }, { "epoch": 16.88, "learning_rate": 7.80507905212794e-06, "loss": 2.9895, "step": 2689000 }, { "epoch": 16.89, "learning_rate": 7.797230349989326e-06, "loss": 2.9854, "step": 2689500 }, { "epoch": 16.89, "learning_rate": 7.78939734525499e-06, "loss": 2.9851, "step": 2690000 }, { "epoch": 16.89, "learning_rate": 7.781548643116375e-06, "loss": 2.9781, "step": 2690500 }, { "epoch": 16.9, "learning_rate": 7.77369994097776e-06, "loss": 2.9807, "step": 2691000 }, { "epoch": 16.9, "learning_rate": 7.765851238839146e-06, "loss": 2.9794, "step": 2691500 }, { "epoch": 16.9, "learning_rate": 7.758002536700531e-06, "loss": 2.9861, "step": 2692000 }, { "epoch": 16.91, "learning_rate": 7.750153834561917e-06, "loss": 2.989, "step": 2692500 }, { "epoch": 16.91, "learning_rate": 7.74232082982758e-06, "loss": 2.9818, "step": 2693000 }, { "epoch": 16.91, "learning_rate": 7.734472127688965e-06, "loss": 2.9818, "step": 2693500 }, { "epoch": 16.92, "learning_rate": 7.72663912295463e-06, "loss": 2.9802, "step": 2694000 }, { "epoch": 16.92, "learning_rate": 7.718790420816015e-06, "loss": 2.9882, "step": 2694500 }, { "epoch": 16.92, "learning_rate": 7.7109417186774e-06, "loss": 2.984, "step": 2695000 }, { "epoch": 16.92, "learning_rate": 7.703093016538786e-06, "loss": 2.9862, "step": 2695500 }, { "epoch": 16.93, "learning_rate": 7.69524431440017e-06, "loss": 2.979, "step": 2696000 }, { "epoch": 16.93, "learning_rate": 7.687395612261557e-06, "loss": 2.9899, "step": 2696500 }, { "epoch": 16.93, "learning_rate": 7.679546910122942e-06, "loss": 2.9895, "step": 2697000 }, { "epoch": 16.94, "learning_rate": 7.671698207984328e-06, "loss": 2.9876, "step": 2697500 }, { "epoch": 16.94, "learning_rate": 7.663849505845713e-06, "loss": 2.9825, "step": 2698000 }, { "epoch": 16.94, "learning_rate": 7.6560008037071e-06, "loss": 2.9843, "step": 2698500 }, { "epoch": 16.95, "learning_rate": 7.648167798972762e-06, "loss": 2.9829, "step": 2699000 }, { "epoch": 16.95, "learning_rate": 7.640334794238426e-06, "loss": 2.9811, "step": 2699500 }, { "epoch": 16.95, "learning_rate": 7.63248609209981e-06, "loss": 2.9851, "step": 2700000 }, { "epoch": 16.95, "eval_accuracy": 0.46131656941828963, "eval_loss": 2.9074714183807373, "eval_runtime": 1450.4719, "eval_samples_per_second": 57.775, "eval_steps_per_second": 5.778, "step": 2700000 }, { "epoch": 16.96, "learning_rate": 7.624637389961197e-06, "loss": 2.9851, "step": 2700500 }, { "epoch": 16.96, "learning_rate": 7.616788687822582e-06, "loss": 2.9815, "step": 2701000 }, { "epoch": 16.96, "learning_rate": 7.608939985683967e-06, "loss": 2.9868, "step": 2701500 }, { "epoch": 16.97, "learning_rate": 7.601091283545353e-06, "loss": 2.9834, "step": 2702000 }, { "epoch": 16.97, "learning_rate": 7.593242581406738e-06, "loss": 2.9768, "step": 2702500 }, { "epoch": 16.97, "learning_rate": 7.585393879268124e-06, "loss": 2.9831, "step": 2703000 }, { "epoch": 16.98, "learning_rate": 7.577545177129511e-06, "loss": 2.9813, "step": 2703500 }, { "epoch": 16.98, "learning_rate": 7.569696474990897e-06, "loss": 2.9885, "step": 2704000 }, { "epoch": 16.98, "learning_rate": 7.561847772852282e-06, "loss": 2.9853, "step": 2704500 }, { "epoch": 16.98, "learning_rate": 7.553999070713667e-06, "loss": 2.9858, "step": 2705000 }, { "epoch": 16.99, "learning_rate": 7.546150368575053e-06, "loss": 2.979, "step": 2705500 }, { "epoch": 16.99, "learning_rate": 7.538301666436438e-06, "loss": 2.9858, "step": 2706000 }, { "epoch": 16.99, "learning_rate": 7.530452964297824e-06, "loss": 2.9787, "step": 2706500 }, { "epoch": 17.0, "learning_rate": 7.522604262159209e-06, "loss": 2.9763, "step": 2707000 }, { "epoch": 17.0, "learning_rate": 7.514771257424872e-06, "loss": 2.984, "step": 2707500 }, { "epoch": 17.0, "learning_rate": 7.506938252690536e-06, "loss": 2.9692, "step": 2708000 }, { "epoch": 17.01, "learning_rate": 7.499089550551922e-06, "loss": 2.9714, "step": 2708500 }, { "epoch": 17.01, "learning_rate": 7.491240848413307e-06, "loss": 2.9597, "step": 2709000 }, { "epoch": 17.01, "learning_rate": 7.483392146274692e-06, "loss": 2.9686, "step": 2709500 }, { "epoch": 17.02, "learning_rate": 7.475543444136078e-06, "loss": 2.9753, "step": 2710000 }, { "epoch": 17.02, "learning_rate": 7.467710439401742e-06, "loss": 2.9766, "step": 2710500 }, { "epoch": 17.02, "learning_rate": 7.4598617372631265e-06, "loss": 2.9683, "step": 2711000 }, { "epoch": 17.03, "learning_rate": 7.452013035124512e-06, "loss": 2.9683, "step": 2711500 }, { "epoch": 17.03, "learning_rate": 7.444164332985898e-06, "loss": 2.977, "step": 2712000 }, { "epoch": 17.03, "learning_rate": 7.436315630847283e-06, "loss": 2.9686, "step": 2712500 }, { "epoch": 17.03, "learning_rate": 7.428482626112946e-06, "loss": 2.9658, "step": 2713000 }, { "epoch": 17.04, "learning_rate": 7.420633923974332e-06, "loss": 2.9769, "step": 2713500 }, { "epoch": 17.04, "learning_rate": 7.4127852218357175e-06, "loss": 2.9655, "step": 2714000 }, { "epoch": 17.04, "learning_rate": 7.404936519697103e-06, "loss": 2.9683, "step": 2714500 }, { "epoch": 17.05, "learning_rate": 7.397087817558489e-06, "loss": 2.9798, "step": 2715000 }, { "epoch": 17.05, "learning_rate": 7.389239115419874e-06, "loss": 2.9734, "step": 2715500 }, { "epoch": 17.05, "learning_rate": 7.381406110685537e-06, "loss": 2.9699, "step": 2716000 }, { "epoch": 17.06, "learning_rate": 7.373557408546923e-06, "loss": 2.9696, "step": 2716500 }, { "epoch": 17.06, "learning_rate": 7.3657087064083085e-06, "loss": 2.9769, "step": 2717000 }, { "epoch": 17.06, "learning_rate": 7.357860004269694e-06, "loss": 2.9635, "step": 2717500 }, { "epoch": 17.07, "learning_rate": 7.35001130213108e-06, "loss": 2.9716, "step": 2718000 }, { "epoch": 17.07, "learning_rate": 7.342162599992465e-06, "loss": 2.9695, "step": 2718500 }, { "epoch": 17.07, "learning_rate": 7.334329595258128e-06, "loss": 2.9654, "step": 2719000 }, { "epoch": 17.08, "learning_rate": 7.326480893119514e-06, "loss": 2.9642, "step": 2719500 }, { "epoch": 17.08, "learning_rate": 7.3186321909808995e-06, "loss": 2.9739, "step": 2720000 }, { "epoch": 17.08, "learning_rate": 7.310783488842285e-06, "loss": 2.9772, "step": 2720500 }, { "epoch": 17.09, "learning_rate": 7.302950484107948e-06, "loss": 2.9727, "step": 2721000 }, { "epoch": 17.09, "learning_rate": 7.295101781969334e-06, "loss": 2.9736, "step": 2721500 }, { "epoch": 17.09, "learning_rate": 7.287253079830719e-06, "loss": 2.9714, "step": 2722000 }, { "epoch": 17.09, "learning_rate": 7.279404377692105e-06, "loss": 2.9786, "step": 2722500 }, { "epoch": 17.1, "learning_rate": 7.271571372957768e-06, "loss": 2.9679, "step": 2723000 }, { "epoch": 17.1, "learning_rate": 7.263722670819154e-06, "loss": 2.9717, "step": 2723500 }, { "epoch": 17.1, "learning_rate": 7.255873968680539e-06, "loss": 2.9758, "step": 2724000 }, { "epoch": 17.11, "learning_rate": 7.248025266541925e-06, "loss": 2.9725, "step": 2724500 }, { "epoch": 17.11, "learning_rate": 7.24017656440331e-06, "loss": 2.968, "step": 2725000 }, { "epoch": 17.11, "learning_rate": 7.232327862264696e-06, "loss": 2.9793, "step": 2725500 }, { "epoch": 17.12, "learning_rate": 7.224479160126082e-06, "loss": 2.9642, "step": 2726000 }, { "epoch": 17.12, "learning_rate": 7.216646155391745e-06, "loss": 2.9686, "step": 2726500 }, { "epoch": 17.12, "learning_rate": 7.20879745325313e-06, "loss": 2.9735, "step": 2727000 }, { "epoch": 17.13, "learning_rate": 7.200948751114516e-06, "loss": 2.9728, "step": 2727500 }, { "epoch": 17.13, "learning_rate": 7.193100048975902e-06, "loss": 2.9723, "step": 2728000 }, { "epoch": 17.13, "learning_rate": 7.185251346837288e-06, "loss": 2.984, "step": 2728500 }, { "epoch": 17.14, "learning_rate": 7.177402644698673e-06, "loss": 2.9703, "step": 2729000 }, { "epoch": 17.14, "learning_rate": 7.169553942560059e-06, "loss": 2.9672, "step": 2729500 }, { "epoch": 17.14, "learning_rate": 7.1617052404214446e-06, "loss": 2.9686, "step": 2730000 }, { "epoch": 17.14, "eval_accuracy": 0.4614259963699554, "eval_loss": 2.906648874282837, "eval_runtime": 1450.5566, "eval_samples_per_second": 57.772, "eval_steps_per_second": 5.778, "step": 2730000 }, { "epoch": 17.14, "learning_rate": 7.153872235687108e-06, "loss": 2.9762, "step": 2730500 }, { "epoch": 17.15, "learning_rate": 7.146023533548493e-06, "loss": 2.9715, "step": 2731000 }, { "epoch": 17.15, "learning_rate": 7.138174831409879e-06, "loss": 2.9731, "step": 2731500 }, { "epoch": 17.15, "learning_rate": 7.130326129271264e-06, "loss": 2.9745, "step": 2732000 }, { "epoch": 17.16, "learning_rate": 7.12247742713265e-06, "loss": 2.9787, "step": 2732500 }, { "epoch": 17.16, "learning_rate": 7.1146287249940356e-06, "loss": 2.9715, "step": 2733000 }, { "epoch": 17.16, "learning_rate": 7.106795720259699e-06, "loss": 2.9769, "step": 2733500 }, { "epoch": 17.17, "learning_rate": 7.098947018121084e-06, "loss": 2.9774, "step": 2734000 }, { "epoch": 17.17, "learning_rate": 7.09109831598247e-06, "loss": 2.9754, "step": 2734500 }, { "epoch": 17.17, "learning_rate": 7.083249613843855e-06, "loss": 2.9754, "step": 2735000 }, { "epoch": 17.18, "learning_rate": 7.07540091170524e-06, "loss": 2.9703, "step": 2735500 }, { "epoch": 17.18, "learning_rate": 7.067552209566626e-06, "loss": 2.9731, "step": 2736000 }, { "epoch": 17.18, "learning_rate": 7.059703507428011e-06, "loss": 2.9712, "step": 2736500 }, { "epoch": 17.19, "learning_rate": 7.051854805289397e-06, "loss": 2.9656, "step": 2737000 }, { "epoch": 17.19, "learning_rate": 7.044021800555061e-06, "loss": 2.9761, "step": 2737500 }, { "epoch": 17.19, "learning_rate": 7.0361730984164455e-06, "loss": 2.9794, "step": 2738000 }, { "epoch": 17.19, "learning_rate": 7.028324396277831e-06, "loss": 2.9729, "step": 2738500 }, { "epoch": 17.2, "learning_rate": 7.020475694139217e-06, "loss": 2.9714, "step": 2739000 }, { "epoch": 17.2, "learning_rate": 7.012642689404881e-06, "loss": 2.9714, "step": 2739500 }, { "epoch": 17.2, "learning_rate": 7.004793987266265e-06, "loss": 2.9769, "step": 2740000 }, { "epoch": 17.21, "learning_rate": 6.996945285127651e-06, "loss": 2.9774, "step": 2740500 }, { "epoch": 17.21, "learning_rate": 6.989112280393315e-06, "loss": 2.9764, "step": 2741000 }, { "epoch": 17.21, "learning_rate": 6.9812635782547005e-06, "loss": 2.9765, "step": 2741500 }, { "epoch": 17.22, "learning_rate": 6.973414876116085e-06, "loss": 2.9738, "step": 2742000 }, { "epoch": 17.22, "learning_rate": 6.965566173977471e-06, "loss": 2.9731, "step": 2742500 }, { "epoch": 17.22, "learning_rate": 6.957717471838856e-06, "loss": 2.9719, "step": 2743000 }, { "epoch": 17.23, "learning_rate": 6.949868769700242e-06, "loss": 2.9767, "step": 2743500 }, { "epoch": 17.23, "learning_rate": 6.942020067561629e-06, "loss": 2.9782, "step": 2744000 }, { "epoch": 17.23, "learning_rate": 6.934171365423015e-06, "loss": 2.9721, "step": 2744500 }, { "epoch": 17.24, "learning_rate": 6.9263226632844e-06, "loss": 2.9746, "step": 2745000 }, { "epoch": 17.24, "learning_rate": 6.918473961145785e-06, "loss": 2.9718, "step": 2745500 }, { "epoch": 17.24, "learning_rate": 6.910625259007171e-06, "loss": 2.9772, "step": 2746000 }, { "epoch": 17.25, "learning_rate": 6.902776556868556e-06, "loss": 2.9772, "step": 2746500 }, { "epoch": 17.25, "learning_rate": 6.894943552134219e-06, "loss": 2.9745, "step": 2747000 }, { "epoch": 17.25, "learning_rate": 6.887094849995605e-06, "loss": 2.9774, "step": 2747500 }, { "epoch": 17.25, "learning_rate": 6.879261845261267e-06, "loss": 2.9772, "step": 2748000 }, { "epoch": 17.26, "learning_rate": 6.8714131431226545e-06, "loss": 2.9747, "step": 2748500 }, { "epoch": 17.26, "learning_rate": 6.863564440984039e-06, "loss": 2.9762, "step": 2749000 }, { "epoch": 17.26, "learning_rate": 6.855715738845425e-06, "loss": 2.9689, "step": 2749500 }, { "epoch": 17.27, "learning_rate": 6.847882734111087e-06, "loss": 2.9803, "step": 2750000 }, { "epoch": 17.27, "learning_rate": 6.840034031972474e-06, "loss": 2.9679, "step": 2750500 }, { "epoch": 17.27, "learning_rate": 6.832201027238136e-06, "loss": 2.9771, "step": 2751000 }, { "epoch": 17.28, "learning_rate": 6.824352325099521e-06, "loss": 2.9783, "step": 2751500 }, { "epoch": 17.28, "learning_rate": 6.816503622960907e-06, "loss": 2.9772, "step": 2752000 }, { "epoch": 17.28, "learning_rate": 6.808654920822294e-06, "loss": 2.9755, "step": 2752500 }, { "epoch": 17.29, "learning_rate": 6.80080621868368e-06, "loss": 2.971, "step": 2753000 }, { "epoch": 17.29, "learning_rate": 6.792973213949341e-06, "loss": 2.9675, "step": 2753500 }, { "epoch": 17.29, "learning_rate": 6.785124511810727e-06, "loss": 2.9735, "step": 2754000 }, { "epoch": 17.3, "learning_rate": 6.777275809672112e-06, "loss": 2.9694, "step": 2754500 }, { "epoch": 17.3, "learning_rate": 6.7694271075335e-06, "loss": 2.9666, "step": 2755000 }, { "epoch": 17.3, "learning_rate": 6.761578405394884e-06, "loss": 2.9735, "step": 2755500 }, { "epoch": 17.3, "learning_rate": 6.75372970325627e-06, "loss": 2.9716, "step": 2756000 }, { "epoch": 17.31, "learning_rate": 6.7458810011176555e-06, "loss": 2.9768, "step": 2756500 }, { "epoch": 17.31, "learning_rate": 6.738032298979041e-06, "loss": 2.9745, "step": 2757000 }, { "epoch": 17.31, "learning_rate": 6.730199294244704e-06, "loss": 2.9802, "step": 2757500 }, { "epoch": 17.32, "learning_rate": 6.72235059210609e-06, "loss": 2.974, "step": 2758000 }, { "epoch": 17.32, "learning_rate": 6.714501889967475e-06, "loss": 2.9789, "step": 2758500 }, { "epoch": 17.32, "learning_rate": 6.706653187828861e-06, "loss": 2.9766, "step": 2759000 }, { "epoch": 17.33, "learning_rate": 6.6988044856902465e-06, "loss": 2.9789, "step": 2759500 }, { "epoch": 17.33, "learning_rate": 6.6909714809559096e-06, "loss": 2.974, "step": 2760000 }, { "epoch": 17.33, "eval_accuracy": 0.46154000756551883, "eval_loss": 2.905803918838501, "eval_runtime": 1450.1279, "eval_samples_per_second": 57.789, "eval_steps_per_second": 5.779, "step": 2760000 }, { "epoch": 17.33, "learning_rate": 6.683122778817295e-06, "loss": 2.9754, "step": 2760500 }, { "epoch": 17.34, "learning_rate": 6.675274076678681e-06, "loss": 2.9809, "step": 2761000 }, { "epoch": 17.34, "learning_rate": 6.667425374540066e-06, "loss": 2.9699, "step": 2761500 }, { "epoch": 17.34, "learning_rate": 6.659576672401452e-06, "loss": 2.9784, "step": 2762000 }, { "epoch": 17.35, "learning_rate": 6.651743667667115e-06, "loss": 2.9784, "step": 2762500 }, { "epoch": 17.35, "learning_rate": 6.643894965528501e-06, "loss": 2.9741, "step": 2763000 }, { "epoch": 17.35, "learning_rate": 6.636046263389886e-06, "loss": 2.9808, "step": 2763500 }, { "epoch": 17.36, "learning_rate": 6.628197561251272e-06, "loss": 2.9764, "step": 2764000 }, { "epoch": 17.36, "learning_rate": 6.620364556516935e-06, "loss": 2.9736, "step": 2764500 }, { "epoch": 17.36, "learning_rate": 6.61251585437832e-06, "loss": 2.9739, "step": 2765000 }, { "epoch": 17.36, "learning_rate": 6.604667152239706e-06, "loss": 2.9805, "step": 2765500 }, { "epoch": 17.37, "learning_rate": 6.596818450101092e-06, "loss": 2.9709, "step": 2766000 }, { "epoch": 17.37, "learning_rate": 6.588969747962477e-06, "loss": 2.9746, "step": 2766500 }, { "epoch": 17.37, "learning_rate": 6.581121045823863e-06, "loss": 2.9751, "step": 2767000 }, { "epoch": 17.38, "learning_rate": 6.573272343685248e-06, "loss": 2.9743, "step": 2767500 }, { "epoch": 17.38, "learning_rate": 6.565423641546634e-06, "loss": 2.9753, "step": 2768000 }, { "epoch": 17.38, "learning_rate": 6.55757493940802e-06, "loss": 2.973, "step": 2768500 }, { "epoch": 17.39, "learning_rate": 6.549726237269406e-06, "loss": 2.9728, "step": 2769000 }, { "epoch": 17.39, "learning_rate": 6.5418775351307916e-06, "loss": 2.9825, "step": 2769500 }, { "epoch": 17.39, "learning_rate": 6.534028832992177e-06, "loss": 2.9747, "step": 2770000 }, { "epoch": 17.4, "learning_rate": 6.5261958282578385e-06, "loss": 2.9763, "step": 2770500 }, { "epoch": 17.4, "learning_rate": 6.518362823523502e-06, "loss": 2.9683, "step": 2771000 }, { "epoch": 17.4, "learning_rate": 6.510514121384888e-06, "loss": 2.9707, "step": 2771500 }, { "epoch": 17.41, "learning_rate": 6.502665419246274e-06, "loss": 2.9775, "step": 2772000 }, { "epoch": 17.41, "learning_rate": 6.494816717107659e-06, "loss": 2.9719, "step": 2772500 }, { "epoch": 17.41, "learning_rate": 6.486968014969046e-06, "loss": 2.9765, "step": 2773000 }, { "epoch": 17.41, "learning_rate": 6.479135010234708e-06, "loss": 2.974, "step": 2773500 }, { "epoch": 17.42, "learning_rate": 6.471302005500371e-06, "loss": 2.9715, "step": 2774000 }, { "epoch": 17.42, "learning_rate": 6.4634533033617565e-06, "loss": 2.9696, "step": 2774500 }, { "epoch": 17.42, "learning_rate": 6.455604601223142e-06, "loss": 2.9733, "step": 2775000 }, { "epoch": 17.43, "learning_rate": 6.447755899084528e-06, "loss": 2.9709, "step": 2775500 }, { "epoch": 17.43, "learning_rate": 6.439907196945913e-06, "loss": 2.9675, "step": 2776000 }, { "epoch": 17.43, "learning_rate": 6.432058494807299e-06, "loss": 2.9772, "step": 2776500 }, { "epoch": 17.44, "learning_rate": 6.424225490072962e-06, "loss": 2.9742, "step": 2777000 }, { "epoch": 17.44, "learning_rate": 6.4163767879343475e-06, "loss": 2.9787, "step": 2777500 }, { "epoch": 17.44, "learning_rate": 6.408528085795733e-06, "loss": 2.9698, "step": 2778000 }, { "epoch": 17.45, "learning_rate": 6.400679383657119e-06, "loss": 2.9735, "step": 2778500 }, { "epoch": 17.45, "learning_rate": 6.392830681518503e-06, "loss": 2.9726, "step": 2779000 }, { "epoch": 17.45, "learning_rate": 6.384981979379891e-06, "loss": 2.9859, "step": 2779500 }, { "epoch": 17.46, "learning_rate": 6.377133277241276e-06, "loss": 2.9712, "step": 2780000 }, { "epoch": 17.46, "learning_rate": 6.369284575102662e-06, "loss": 2.9713, "step": 2780500 }, { "epoch": 17.46, "learning_rate": 6.3614358729640475e-06, "loss": 2.9708, "step": 2781000 }, { "epoch": 17.46, "learning_rate": 6.3536028682297105e-06, "loss": 2.984, "step": 2781500 }, { "epoch": 17.47, "learning_rate": 6.345754166091096e-06, "loss": 2.9743, "step": 2782000 }, { "epoch": 17.47, "learning_rate": 6.337905463952482e-06, "loss": 2.9756, "step": 2782500 }, { "epoch": 17.47, "learning_rate": 6.330056761813867e-06, "loss": 2.9759, "step": 2783000 }, { "epoch": 17.48, "learning_rate": 6.322223757079529e-06, "loss": 2.9782, "step": 2783500 }, { "epoch": 17.48, "learning_rate": 6.314375054940916e-06, "loss": 2.9729, "step": 2784000 }, { "epoch": 17.48, "learning_rate": 6.3065263528023015e-06, "loss": 2.975, "step": 2784500 }, { "epoch": 17.49, "learning_rate": 6.298677650663687e-06, "loss": 2.9808, "step": 2785000 }, { "epoch": 17.49, "learning_rate": 6.290828948525073e-06, "loss": 2.9718, "step": 2785500 }, { "epoch": 17.49, "learning_rate": 6.282995943790736e-06, "loss": 2.9808, "step": 2786000 }, { "epoch": 17.5, "learning_rate": 6.275147241652121e-06, "loss": 2.9792, "step": 2786500 }, { "epoch": 17.5, "learning_rate": 6.267298539513507e-06, "loss": 2.9771, "step": 2787000 }, { "epoch": 17.5, "learning_rate": 6.259465534779168e-06, "loss": 2.9759, "step": 2787500 }, { "epoch": 17.51, "learning_rate": 6.251616832640556e-06, "loss": 2.9772, "step": 2788000 }, { "epoch": 17.51, "learning_rate": 6.24376813050194e-06, "loss": 2.979, "step": 2788500 }, { "epoch": 17.51, "learning_rate": 6.235919428363326e-06, "loss": 2.9763, "step": 2789000 }, { "epoch": 17.52, "learning_rate": 6.228070726224712e-06, "loss": 2.9714, "step": 2789500 }, { "epoch": 17.52, "learning_rate": 6.220222024086098e-06, "loss": 2.9701, "step": 2790000 }, { "epoch": 17.52, "eval_accuracy": 0.4617913711068731, "eval_loss": 2.904313564300537, "eval_runtime": 1451.9597, "eval_samples_per_second": 57.716, "eval_steps_per_second": 5.772, "step": 2790000 }, { "epoch": 17.52, "learning_rate": 6.21238901935176e-06, "loss": 2.9755, "step": 2790500 }, { "epoch": 17.52, "learning_rate": 6.204540317213146e-06, "loss": 2.98, "step": 2791000 }, { "epoch": 17.53, "learning_rate": 6.196691615074532e-06, "loss": 2.9792, "step": 2791500 }, { "epoch": 17.53, "learning_rate": 6.188842912935918e-06, "loss": 2.9708, "step": 2792000 }, { "epoch": 17.53, "learning_rate": 6.1809942107973025e-06, "loss": 2.9752, "step": 2792500 }, { "epoch": 17.54, "learning_rate": 6.173145508658688e-06, "loss": 2.9696, "step": 2793000 }, { "epoch": 17.54, "learning_rate": 6.165296806520074e-06, "loss": 2.9788, "step": 2793500 }, { "epoch": 17.54, "learning_rate": 6.15744810438146e-06, "loss": 2.9774, "step": 2794000 }, { "epoch": 17.55, "learning_rate": 6.149615099647122e-06, "loss": 2.9744, "step": 2794500 }, { "epoch": 17.55, "learning_rate": 6.141766397508508e-06, "loss": 2.9776, "step": 2795000 }, { "epoch": 17.55, "learning_rate": 6.1339176953698935e-06, "loss": 2.9739, "step": 2795500 }, { "epoch": 17.56, "learning_rate": 6.126068993231279e-06, "loss": 2.9791, "step": 2796000 }, { "epoch": 17.56, "learning_rate": 6.1182202910926656e-06, "loss": 2.974, "step": 2796500 }, { "epoch": 17.56, "learning_rate": 6.110387286358328e-06, "loss": 2.9753, "step": 2797000 }, { "epoch": 17.57, "learning_rate": 6.102538584219713e-06, "loss": 2.9746, "step": 2797500 }, { "epoch": 17.57, "learning_rate": 6.094689882081099e-06, "loss": 2.969, "step": 2798000 }, { "epoch": 17.57, "learning_rate": 6.086841179942485e-06, "loss": 2.9786, "step": 2798500 }, { "epoch": 17.57, "learning_rate": 6.078992477803871e-06, "loss": 2.98, "step": 2799000 }, { "epoch": 17.58, "learning_rate": 6.071159473069533e-06, "loss": 2.9704, "step": 2799500 }, { "epoch": 17.58, "learning_rate": 6.063310770930919e-06, "loss": 2.975, "step": 2800000 }, { "epoch": 17.58, "learning_rate": 6.055462068792305e-06, "loss": 2.9726, "step": 2800500 }, { "epoch": 17.59, "learning_rate": 6.047613366653691e-06, "loss": 2.9743, "step": 2801000 }, { "epoch": 17.59, "learning_rate": 6.039764664515076e-06, "loss": 2.9801, "step": 2801500 }, { "epoch": 17.59, "learning_rate": 6.031915962376462e-06, "loss": 2.985, "step": 2802000 }, { "epoch": 17.6, "learning_rate": 6.024082957642124e-06, "loss": 2.9666, "step": 2802500 }, { "epoch": 17.6, "learning_rate": 6.016234255503511e-06, "loss": 2.974, "step": 2803000 }, { "epoch": 17.6, "learning_rate": 6.008385553364896e-06, "loss": 2.978, "step": 2803500 }, { "epoch": 17.61, "learning_rate": 6.000536851226282e-06, "loss": 2.9783, "step": 2804000 }, { "epoch": 17.61, "learning_rate": 5.992688149087667e-06, "loss": 2.9728, "step": 2804500 }, { "epoch": 17.61, "learning_rate": 5.9848551443533305e-06, "loss": 2.9768, "step": 2805000 }, { "epoch": 17.62, "learning_rate": 5.977006442214716e-06, "loss": 2.9746, "step": 2805500 }, { "epoch": 17.62, "learning_rate": 5.969157740076102e-06, "loss": 2.9759, "step": 2806000 }, { "epoch": 17.62, "learning_rate": 5.961309037937487e-06, "loss": 2.971, "step": 2806500 }, { "epoch": 17.63, "learning_rate": 5.953460335798872e-06, "loss": 2.9855, "step": 2807000 }, { "epoch": 17.63, "learning_rate": 5.945627331064536e-06, "loss": 2.9722, "step": 2807500 }, { "epoch": 17.63, "learning_rate": 5.9377786289259215e-06, "loss": 2.9735, "step": 2808000 }, { "epoch": 17.63, "learning_rate": 5.929929926787307e-06, "loss": 2.9747, "step": 2808500 }, { "epoch": 17.64, "learning_rate": 5.922081224648692e-06, "loss": 2.9748, "step": 2809000 }, { "epoch": 17.64, "learning_rate": 5.914248219914356e-06, "loss": 2.9794, "step": 2809500 }, { "epoch": 17.64, "learning_rate": 5.906399517775741e-06, "loss": 2.9751, "step": 2810000 }, { "epoch": 17.65, "learning_rate": 5.898550815637127e-06, "loss": 2.9769, "step": 2810500 }, { "epoch": 17.65, "learning_rate": 5.890702113498512e-06, "loss": 2.9879, "step": 2811000 }, { "epoch": 17.65, "learning_rate": 5.882853411359897e-06, "loss": 2.9808, "step": 2811500 }, { "epoch": 17.66, "learning_rate": 5.875004709221284e-06, "loss": 2.977, "step": 2812000 }, { "epoch": 17.66, "learning_rate": 5.867156007082669e-06, "loss": 2.9709, "step": 2812500 }, { "epoch": 17.66, "learning_rate": 5.859307304944055e-06, "loss": 2.975, "step": 2813000 }, { "epoch": 17.67, "learning_rate": 5.851474300209717e-06, "loss": 2.9748, "step": 2813500 }, { "epoch": 17.67, "learning_rate": 5.8436255980711035e-06, "loss": 2.9812, "step": 2814000 }, { "epoch": 17.67, "learning_rate": 5.8357925933367666e-06, "loss": 2.9835, "step": 2814500 }, { "epoch": 17.68, "learning_rate": 5.827943891198151e-06, "loss": 2.9795, "step": 2815000 }, { "epoch": 17.68, "learning_rate": 5.820095189059537e-06, "loss": 2.9705, "step": 2815500 }, { "epoch": 17.68, "learning_rate": 5.8122464869209225e-06, "loss": 2.9812, "step": 2816000 }, { "epoch": 17.68, "learning_rate": 5.804413482186586e-06, "loss": 2.9693, "step": 2816500 }, { "epoch": 17.69, "learning_rate": 5.796564780047971e-06, "loss": 2.9756, "step": 2817000 }, { "epoch": 17.69, "learning_rate": 5.788716077909357e-06, "loss": 2.9704, "step": 2817500 }, { "epoch": 17.69, "learning_rate": 5.780867375770742e-06, "loss": 2.9775, "step": 2818000 }, { "epoch": 17.7, "learning_rate": 5.773018673632129e-06, "loss": 2.9733, "step": 2818500 }, { "epoch": 17.7, "learning_rate": 5.765185668897791e-06, "loss": 2.9832, "step": 2819000 }, { "epoch": 17.7, "learning_rate": 5.7573369667591765e-06, "loss": 2.9734, "step": 2819500 }, { "epoch": 17.71, "learning_rate": 5.749488264620562e-06, "loss": 2.9793, "step": 2820000 }, { "epoch": 17.71, "eval_accuracy": 0.46194352601120403, "eval_loss": 2.902987480163574, "eval_runtime": 1449.6914, "eval_samples_per_second": 57.806, "eval_steps_per_second": 5.781, "step": 2820000 }, { "epoch": 17.71, "learning_rate": 5.7416395624819486e-06, "loss": 2.9829, "step": 2820500 }, { "epoch": 17.71, "learning_rate": 5.733806557747611e-06, "loss": 2.9725, "step": 2821000 }, { "epoch": 17.72, "learning_rate": 5.725973553013274e-06, "loss": 2.9739, "step": 2821500 }, { "epoch": 17.72, "learning_rate": 5.7181248508746594e-06, "loss": 2.9725, "step": 2822000 }, { "epoch": 17.72, "learning_rate": 5.710276148736046e-06, "loss": 2.9687, "step": 2822500 }, { "epoch": 17.73, "learning_rate": 5.702427446597431e-06, "loss": 2.9801, "step": 2823000 }, { "epoch": 17.73, "learning_rate": 5.694578744458816e-06, "loss": 2.9746, "step": 2823500 }, { "epoch": 17.73, "learning_rate": 5.686730042320202e-06, "loss": 2.9763, "step": 2824000 }, { "epoch": 17.73, "learning_rate": 5.678881340181587e-06, "loss": 2.9756, "step": 2824500 }, { "epoch": 17.74, "learning_rate": 5.671032638042974e-06, "loss": 2.9756, "step": 2825000 }, { "epoch": 17.74, "learning_rate": 5.663183935904359e-06, "loss": 2.9802, "step": 2825500 }, { "epoch": 17.74, "learning_rate": 5.655335233765745e-06, "loss": 2.9669, "step": 2826000 }, { "epoch": 17.75, "learning_rate": 5.647486531627131e-06, "loss": 2.9782, "step": 2826500 }, { "epoch": 17.75, "learning_rate": 5.639637829488516e-06, "loss": 2.9794, "step": 2827000 }, { "epoch": 17.75, "learning_rate": 5.631804824754179e-06, "loss": 2.9771, "step": 2827500 }, { "epoch": 17.76, "learning_rate": 5.6239718200198414e-06, "loss": 2.98, "step": 2828000 }, { "epoch": 17.76, "learning_rate": 5.616123117881227e-06, "loss": 2.9708, "step": 2828500 }, { "epoch": 17.76, "learning_rate": 5.608290113146891e-06, "loss": 2.979, "step": 2829000 }, { "epoch": 17.77, "learning_rate": 5.600457108412553e-06, "loss": 2.9742, "step": 2829500 }, { "epoch": 17.77, "learning_rate": 5.592608406273939e-06, "loss": 2.978, "step": 2830000 }, { "epoch": 17.77, "learning_rate": 5.584759704135324e-06, "loss": 2.9754, "step": 2830500 }, { "epoch": 17.78, "learning_rate": 5.576911001996711e-06, "loss": 2.979, "step": 2831000 }, { "epoch": 17.78, "learning_rate": 5.5690622998580955e-06, "loss": 2.9711, "step": 2831500 }, { "epoch": 17.78, "learning_rate": 5.561229295123759e-06, "loss": 2.9709, "step": 2832000 }, { "epoch": 17.79, "learning_rate": 5.553380592985144e-06, "loss": 2.9766, "step": 2832500 }, { "epoch": 17.79, "learning_rate": 5.545531890846531e-06, "loss": 2.9805, "step": 2833000 }, { "epoch": 17.79, "learning_rate": 5.537683188707915e-06, "loss": 2.9747, "step": 2833500 }, { "epoch": 17.79, "learning_rate": 5.529834486569301e-06, "loss": 2.9827, "step": 2834000 }, { "epoch": 17.8, "learning_rate": 5.5219857844306865e-06, "loss": 2.98, "step": 2834500 }, { "epoch": 17.8, "learning_rate": 5.514137082292072e-06, "loss": 2.9773, "step": 2835000 }, { "epoch": 17.8, "learning_rate": 5.506304077557735e-06, "loss": 2.9738, "step": 2835500 }, { "epoch": 17.81, "learning_rate": 5.498455375419121e-06, "loss": 2.9725, "step": 2836000 }, { "epoch": 17.81, "learning_rate": 5.490606673280506e-06, "loss": 2.9763, "step": 2836500 }, { "epoch": 17.81, "learning_rate": 5.482757971141892e-06, "loss": 2.9793, "step": 2837000 }, { "epoch": 17.82, "learning_rate": 5.4749092690032775e-06, "loss": 2.9761, "step": 2837500 }, { "epoch": 17.82, "learning_rate": 5.467060566864664e-06, "loss": 2.9699, "step": 2838000 }, { "epoch": 17.82, "learning_rate": 5.4592118647260496e-06, "loss": 2.9822, "step": 2838500 }, { "epoch": 17.83, "learning_rate": 5.451363162587435e-06, "loss": 2.977, "step": 2839000 }, { "epoch": 17.83, "learning_rate": 5.44351446044882e-06, "loss": 2.9714, "step": 2839500 }, { "epoch": 17.83, "learning_rate": 5.4356657583102055e-06, "loss": 2.9755, "step": 2840000 }, { "epoch": 17.84, "learning_rate": 5.427817056171592e-06, "loss": 2.9751, "step": 2840500 }, { "epoch": 17.84, "learning_rate": 5.4199683540329775e-06, "loss": 2.9769, "step": 2841000 }, { "epoch": 17.84, "learning_rate": 5.412119651894363e-06, "loss": 2.9795, "step": 2841500 }, { "epoch": 17.84, "learning_rate": 5.404270949755749e-06, "loss": 2.9726, "step": 2842000 }, { "epoch": 17.85, "learning_rate": 5.396422247617134e-06, "loss": 2.9764, "step": 2842500 }, { "epoch": 17.85, "learning_rate": 5.38857354547852e-06, "loss": 2.9732, "step": 2843000 }, { "epoch": 17.85, "learning_rate": 5.380740540744183e-06, "loss": 2.9734, "step": 2843500 }, { "epoch": 17.86, "learning_rate": 5.3728918386055685e-06, "loss": 2.9748, "step": 2844000 }, { "epoch": 17.86, "learning_rate": 5.365058833871231e-06, "loss": 2.9748, "step": 2844500 }, { "epoch": 17.86, "learning_rate": 5.357210131732617e-06, "loss": 2.9766, "step": 2845000 }, { "epoch": 17.87, "learning_rate": 5.349361429594003e-06, "loss": 2.9706, "step": 2845500 }, { "epoch": 17.87, "learning_rate": 5.341512727455388e-06, "loss": 2.9753, "step": 2846000 }, { "epoch": 17.87, "learning_rate": 5.333664025316774e-06, "loss": 2.9729, "step": 2846500 }, { "epoch": 17.88, "learning_rate": 5.3258153231781595e-06, "loss": 2.9772, "step": 2847000 }, { "epoch": 17.88, "learning_rate": 5.317966621039545e-06, "loss": 2.9759, "step": 2847500 }, { "epoch": 17.88, "learning_rate": 5.310133616305208e-06, "loss": 2.9767, "step": 2848000 }, { "epoch": 17.89, "learning_rate": 5.302284914166594e-06, "loss": 2.9811, "step": 2848500 }, { "epoch": 17.89, "learning_rate": 5.294436212027979e-06, "loss": 2.9731, "step": 2849000 }, { "epoch": 17.89, "learning_rate": 5.286587509889365e-06, "loss": 2.9727, "step": 2849500 }, { "epoch": 17.9, "learning_rate": 5.278754505155028e-06, "loss": 2.976, "step": 2850000 }, { "epoch": 17.9, "eval_accuracy": 0.46209687071929456, "eval_loss": 2.9014880657196045, "eval_runtime": 1451.2482, "eval_samples_per_second": 57.744, "eval_steps_per_second": 5.775, "step": 2850000 }, { "epoch": 17.9, "learning_rate": 5.270905803016414e-06, "loss": 2.975, "step": 2850500 }, { "epoch": 17.9, "learning_rate": 5.263057100877799e-06, "loss": 2.9744, "step": 2851000 }, { "epoch": 17.9, "learning_rate": 5.255208398739185e-06, "loss": 2.9817, "step": 2851500 }, { "epoch": 17.91, "learning_rate": 5.247375394004848e-06, "loss": 2.9761, "step": 2852000 }, { "epoch": 17.91, "learning_rate": 5.239526691866233e-06, "loss": 2.9726, "step": 2852500 }, { "epoch": 17.91, "learning_rate": 5.231677989727619e-06, "loss": 2.9689, "step": 2853000 }, { "epoch": 17.92, "learning_rate": 5.223829287589005e-06, "loss": 2.9741, "step": 2853500 }, { "epoch": 17.92, "learning_rate": 5.21598058545039e-06, "loss": 2.9779, "step": 2854000 }, { "epoch": 17.92, "learning_rate": 5.208147580716053e-06, "loss": 2.974, "step": 2854500 }, { "epoch": 17.93, "learning_rate": 5.200298878577439e-06, "loss": 2.9739, "step": 2855000 }, { "epoch": 17.93, "learning_rate": 5.192450176438824e-06, "loss": 2.9713, "step": 2855500 }, { "epoch": 17.93, "learning_rate": 5.18460147430021e-06, "loss": 2.967, "step": 2856000 }, { "epoch": 17.94, "learning_rate": 5.176752772161596e-06, "loss": 2.976, "step": 2856500 }, { "epoch": 17.94, "learning_rate": 5.168919767427259e-06, "loss": 2.9795, "step": 2857000 }, { "epoch": 17.94, "learning_rate": 5.161071065288644e-06, "loss": 2.978, "step": 2857500 }, { "epoch": 17.95, "learning_rate": 5.15322236315003e-06, "loss": 2.9677, "step": 2858000 }, { "epoch": 17.95, "learning_rate": 5.1453736610114154e-06, "loss": 2.9784, "step": 2858500 }, { "epoch": 17.95, "learning_rate": 5.137524958872801e-06, "loss": 2.9646, "step": 2859000 }, { "epoch": 17.95, "learning_rate": 5.129676256734187e-06, "loss": 2.9732, "step": 2859500 }, { "epoch": 17.96, "learning_rate": 5.121843251999849e-06, "loss": 2.9771, "step": 2860000 }, { "epoch": 17.96, "learning_rate": 5.114010247265513e-06, "loss": 2.9786, "step": 2860500 }, { "epoch": 17.96, "learning_rate": 5.106161545126898e-06, "loss": 2.9692, "step": 2861000 }, { "epoch": 17.97, "learning_rate": 5.098312842988284e-06, "loss": 2.9794, "step": 2861500 }, { "epoch": 17.97, "learning_rate": 5.0904641408496695e-06, "loss": 2.9763, "step": 2862000 }, { "epoch": 17.97, "learning_rate": 5.082615438711055e-06, "loss": 2.9726, "step": 2862500 }, { "epoch": 17.98, "learning_rate": 5.074766736572441e-06, "loss": 2.9697, "step": 2863000 }, { "epoch": 17.98, "learning_rate": 5.066918034433826e-06, "loss": 2.974, "step": 2863500 }, { "epoch": 17.98, "learning_rate": 5.059069332295212e-06, "loss": 2.9751, "step": 2864000 }, { "epoch": 17.99, "learning_rate": 5.051236327560875e-06, "loss": 2.9761, "step": 2864500 }, { "epoch": 17.99, "learning_rate": 5.0433876254222605e-06, "loss": 2.9757, "step": 2865000 }, { "epoch": 17.99, "learning_rate": 5.035538923283646e-06, "loss": 2.9719, "step": 2865500 }, { "epoch": 18.0, "learning_rate": 5.027690221145032e-06, "loss": 2.9811, "step": 2866000 }, { "epoch": 18.0, "learning_rate": 5.019857216410694e-06, "loss": 2.9736, "step": 2866500 }, { "epoch": 18.0, "learning_rate": 5.01200851427208e-06, "loss": 2.972, "step": 2867000 }, { "epoch": 18.0, "learning_rate": 5.004159812133466e-06, "loss": 2.9694, "step": 2867500 }, { "epoch": 18.01, "learning_rate": 4.9963111099948515e-06, "loss": 2.9618, "step": 2868000 }, { "epoch": 18.01, "learning_rate": 4.988462407856237e-06, "loss": 2.9638, "step": 2868500 }, { "epoch": 18.01, "learning_rate": 4.980613705717623e-06, "loss": 2.9755, "step": 2869000 }, { "epoch": 18.02, "learning_rate": 4.972765003579008e-06, "loss": 2.9648, "step": 2869500 }, { "epoch": 18.02, "learning_rate": 4.964916301440394e-06, "loss": 2.9625, "step": 2870000 }, { "epoch": 18.02, "learning_rate": 4.9570675993017795e-06, "loss": 2.9684, "step": 2870500 }, { "epoch": 18.03, "learning_rate": 4.9492345945674425e-06, "loss": 2.9631, "step": 2871000 }, { "epoch": 18.03, "learning_rate": 4.941385892428828e-06, "loss": 2.9625, "step": 2871500 }, { "epoch": 18.03, "learning_rate": 4.933537190290214e-06, "loss": 2.964, "step": 2872000 }, { "epoch": 18.04, "learning_rate": 4.925704185555877e-06, "loss": 2.9622, "step": 2872500 }, { "epoch": 18.04, "learning_rate": 4.917855483417262e-06, "loss": 2.9628, "step": 2873000 }, { "epoch": 18.04, "learning_rate": 4.910006781278648e-06, "loss": 2.9707, "step": 2873500 }, { "epoch": 18.05, "learning_rate": 4.9021580791400335e-06, "loss": 2.9641, "step": 2874000 }, { "epoch": 18.05, "learning_rate": 4.894309377001419e-06, "loss": 2.9592, "step": 2874500 }, { "epoch": 18.05, "learning_rate": 4.886460674862805e-06, "loss": 2.9644, "step": 2875000 }, { "epoch": 18.06, "learning_rate": 4.87861197272419e-06, "loss": 2.9685, "step": 2875500 }, { "epoch": 18.06, "learning_rate": 4.870763270585576e-06, "loss": 2.9658, "step": 2876000 }, { "epoch": 18.06, "learning_rate": 4.862914568446962e-06, "loss": 2.9602, "step": 2876500 }, { "epoch": 18.06, "learning_rate": 4.855065866308348e-06, "loss": 2.9649, "step": 2877000 }, { "epoch": 18.07, "learning_rate": 4.8472171641697335e-06, "loss": 2.9682, "step": 2877500 }, { "epoch": 18.07, "learning_rate": 4.839368462031118e-06, "loss": 2.9671, "step": 2878000 }, { "epoch": 18.07, "learning_rate": 4.831535457296782e-06, "loss": 2.9634, "step": 2878500 }, { "epoch": 18.08, "learning_rate": 4.823686755158168e-06, "loss": 2.963, "step": 2879000 }, { "epoch": 18.08, "learning_rate": 4.815838053019553e-06, "loss": 2.9607, "step": 2879500 }, { "epoch": 18.08, "learning_rate": 4.807989350880938e-06, "loss": 2.9691, "step": 2880000 }, { "epoch": 18.08, "eval_accuracy": 0.4623538100076797, "eval_loss": 2.900641441345215, "eval_runtime": 1449.8261, "eval_samples_per_second": 57.801, "eval_steps_per_second": 5.781, "step": 2880000 }, { "epoch": 18.09, "learning_rate": 4.800156346146602e-06, "loss": 2.9657, "step": 2880500 }, { "epoch": 18.09, "learning_rate": 4.7923076440079876e-06, "loss": 2.9668, "step": 2881000 }, { "epoch": 18.09, "learning_rate": 4.784458941869373e-06, "loss": 2.9574, "step": 2881500 }, { "epoch": 18.1, "learning_rate": 4.776610239730758e-06, "loss": 2.9714, "step": 2882000 }, { "epoch": 18.1, "learning_rate": 4.7687615375921435e-06, "loss": 2.96, "step": 2882500 }, { "epoch": 18.1, "learning_rate": 4.76091283545353e-06, "loss": 2.9694, "step": 2883000 }, { "epoch": 18.11, "learning_rate": 4.753079830719193e-06, "loss": 2.9695, "step": 2883500 }, { "epoch": 18.11, "learning_rate": 4.745231128580578e-06, "loss": 2.9683, "step": 2884000 }, { "epoch": 18.11, "learning_rate": 4.737382426441963e-06, "loss": 2.9609, "step": 2884500 }, { "epoch": 18.11, "learning_rate": 4.729549421707627e-06, "loss": 2.9633, "step": 2885000 }, { "epoch": 18.12, "learning_rate": 4.721700719569013e-06, "loss": 2.9664, "step": 2885500 }, { "epoch": 18.12, "learning_rate": 4.7138520174303975e-06, "loss": 2.9609, "step": 2886000 }, { "epoch": 18.12, "learning_rate": 4.706003315291783e-06, "loss": 2.9695, "step": 2886500 }, { "epoch": 18.13, "learning_rate": 4.698154613153169e-06, "loss": 2.9686, "step": 2887000 }, { "epoch": 18.13, "learning_rate": 4.690305911014555e-06, "loss": 2.9621, "step": 2887500 }, { "epoch": 18.13, "learning_rate": 4.682457208875941e-06, "loss": 2.969, "step": 2888000 }, { "epoch": 18.14, "learning_rate": 4.674608506737326e-06, "loss": 2.9657, "step": 2888500 }, { "epoch": 18.14, "learning_rate": 4.666759804598712e-06, "loss": 2.962, "step": 2889000 }, { "epoch": 18.14, "learning_rate": 4.658926799864375e-06, "loss": 2.9663, "step": 2889500 }, { "epoch": 18.15, "learning_rate": 4.651078097725761e-06, "loss": 2.9681, "step": 2890000 }, { "epoch": 18.15, "learning_rate": 4.643229395587146e-06, "loss": 2.9673, "step": 2890500 }, { "epoch": 18.15, "learning_rate": 4.635380693448532e-06, "loss": 2.9708, "step": 2891000 }, { "epoch": 18.16, "learning_rate": 4.627547688714194e-06, "loss": 2.9775, "step": 2891500 }, { "epoch": 18.16, "learning_rate": 4.61969898657558e-06, "loss": 2.965, "step": 2892000 }, { "epoch": 18.16, "learning_rate": 4.611850284436966e-06, "loss": 2.9737, "step": 2892500 }, { "epoch": 18.17, "learning_rate": 4.604001582298352e-06, "loss": 2.9669, "step": 2893000 }, { "epoch": 18.17, "learning_rate": 4.596152880159737e-06, "loss": 2.9672, "step": 2893500 }, { "epoch": 18.17, "learning_rate": 4.588304178021123e-06, "loss": 2.968, "step": 2894000 }, { "epoch": 18.17, "learning_rate": 4.580471173286786e-06, "loss": 2.9668, "step": 2894500 }, { "epoch": 18.18, "learning_rate": 4.572622471148171e-06, "loss": 2.9616, "step": 2895000 }, { "epoch": 18.18, "learning_rate": 4.564773769009557e-06, "loss": 2.9612, "step": 2895500 }, { "epoch": 18.18, "learning_rate": 4.556925066870943e-06, "loss": 2.9713, "step": 2896000 }, { "epoch": 18.19, "learning_rate": 4.549076364732328e-06, "loss": 2.9616, "step": 2896500 }, { "epoch": 18.19, "learning_rate": 4.541243359997991e-06, "loss": 2.9626, "step": 2897000 }, { "epoch": 18.19, "learning_rate": 4.533394657859377e-06, "loss": 2.964, "step": 2897500 }, { "epoch": 18.2, "learning_rate": 4.525545955720762e-06, "loss": 2.9642, "step": 2898000 }, { "epoch": 18.2, "learning_rate": 4.517697253582147e-06, "loss": 2.9676, "step": 2898500 }, { "epoch": 18.2, "learning_rate": 4.509848551443534e-06, "loss": 2.9641, "step": 2899000 }, { "epoch": 18.21, "learning_rate": 4.502015546709197e-06, "loss": 2.9595, "step": 2899500 }, { "epoch": 18.21, "learning_rate": 4.494166844570582e-06, "loss": 2.9652, "step": 2900000 }, { "epoch": 18.21, "learning_rate": 4.486318142431967e-06, "loss": 2.9612, "step": 2900500 }, { "epoch": 18.22, "learning_rate": 4.4784694402933534e-06, "loss": 2.9661, "step": 2901000 }, { "epoch": 18.22, "learning_rate": 4.470620738154739e-06, "loss": 2.9682, "step": 2901500 }, { "epoch": 18.22, "learning_rate": 4.462787733420402e-06, "loss": 2.9688, "step": 2902000 }, { "epoch": 18.22, "learning_rate": 4.454939031281787e-06, "loss": 2.9643, "step": 2902500 }, { "epoch": 18.23, "learning_rate": 4.447090329143173e-06, "loss": 2.972, "step": 2903000 }, { "epoch": 18.23, "learning_rate": 4.439257324408836e-06, "loss": 2.971, "step": 2903500 }, { "epoch": 18.23, "learning_rate": 4.431408622270222e-06, "loss": 2.9641, "step": 2904000 }, { "epoch": 18.24, "learning_rate": 4.423559920131607e-06, "loss": 2.9626, "step": 2904500 }, { "epoch": 18.24, "learning_rate": 4.415711217992992e-06, "loss": 2.9622, "step": 2905000 }, { "epoch": 18.24, "learning_rate": 4.407862515854379e-06, "loss": 2.9617, "step": 2905500 }, { "epoch": 18.25, "learning_rate": 4.400013813715764e-06, "loss": 2.9638, "step": 2906000 }, { "epoch": 18.25, "learning_rate": 4.39216511157715e-06, "loss": 2.9642, "step": 2906500 }, { "epoch": 18.25, "learning_rate": 4.3843164094385354e-06, "loss": 2.968, "step": 2907000 }, { "epoch": 18.26, "learning_rate": 4.376467707299921e-06, "loss": 2.9672, "step": 2907500 }, { "epoch": 18.26, "learning_rate": 4.368619005161307e-06, "loss": 2.9679, "step": 2908000 }, { "epoch": 18.26, "learning_rate": 4.36078600042697e-06, "loss": 2.9645, "step": 2908500 }, { "epoch": 18.27, "learning_rate": 4.352937298288355e-06, "loss": 2.9706, "step": 2909000 }, { "epoch": 18.27, "learning_rate": 4.345088596149741e-06, "loss": 2.9638, "step": 2909500 }, { "epoch": 18.27, "learning_rate": 4.3372398940111265e-06, "loss": 2.9697, "step": 2910000 }, { "epoch": 18.27, "eval_accuracy": 0.4625318606408985, "eval_loss": 2.8990705013275146, "eval_runtime": 1451.3196, "eval_samples_per_second": 57.741, "eval_steps_per_second": 5.775, "step": 2910000 }, { "epoch": 18.27, "learning_rate": 4.329391191872512e-06, "loss": 2.9649, "step": 2910500 }, { "epoch": 18.28, "learning_rate": 4.321558187138175e-06, "loss": 2.9673, "step": 2911000 }, { "epoch": 18.28, "learning_rate": 4.313709484999561e-06, "loss": 2.9651, "step": 2911500 }, { "epoch": 18.28, "learning_rate": 4.305860782860946e-06, "loss": 2.9722, "step": 2912000 }, { "epoch": 18.29, "learning_rate": 4.298012080722332e-06, "loss": 2.9657, "step": 2912500 }, { "epoch": 18.29, "learning_rate": 4.2901633785837175e-06, "loss": 2.9689, "step": 2913000 }, { "epoch": 18.29, "learning_rate": 4.282314676445103e-06, "loss": 2.9538, "step": 2913500 }, { "epoch": 18.3, "learning_rate": 4.274481671710766e-06, "loss": 2.9732, "step": 2914000 }, { "epoch": 18.3, "learning_rate": 4.266632969572152e-06, "loss": 2.9705, "step": 2914500 }, { "epoch": 18.3, "learning_rate": 4.258784267433537e-06, "loss": 2.9644, "step": 2915000 }, { "epoch": 18.31, "learning_rate": 4.250935565294923e-06, "loss": 2.9674, "step": 2915500 }, { "epoch": 18.31, "learning_rate": 4.2430868631563085e-06, "loss": 2.958, "step": 2916000 }, { "epoch": 18.31, "learning_rate": 4.2352538584219715e-06, "loss": 2.965, "step": 2916500 }, { "epoch": 18.32, "learning_rate": 4.227405156283357e-06, "loss": 2.9646, "step": 2917000 }, { "epoch": 18.32, "learning_rate": 4.219556454144743e-06, "loss": 2.9727, "step": 2917500 }, { "epoch": 18.32, "learning_rate": 4.211707752006128e-06, "loss": 2.9639, "step": 2918000 }, { "epoch": 18.33, "learning_rate": 4.203859049867514e-06, "loss": 2.9635, "step": 2918500 }, { "epoch": 18.33, "learning_rate": 4.196026045133177e-06, "loss": 2.9697, "step": 2919000 }, { "epoch": 18.33, "learning_rate": 4.1881773429945625e-06, "loss": 2.9661, "step": 2919500 }, { "epoch": 18.33, "learning_rate": 4.180328640855948e-06, "loss": 2.9657, "step": 2920000 }, { "epoch": 18.34, "learning_rate": 4.172495636121611e-06, "loss": 2.9726, "step": 2920500 }, { "epoch": 18.34, "learning_rate": 4.164646933982997e-06, "loss": 2.9767, "step": 2921000 }, { "epoch": 18.34, "learning_rate": 4.156798231844382e-06, "loss": 2.9639, "step": 2921500 }, { "epoch": 18.35, "learning_rate": 4.148949529705768e-06, "loss": 2.9729, "step": 2922000 }, { "epoch": 18.35, "learning_rate": 4.1411008275671535e-06, "loss": 2.9571, "step": 2922500 }, { "epoch": 18.35, "learning_rate": 4.133252125428539e-06, "loss": 2.9647, "step": 2923000 }, { "epoch": 18.36, "learning_rate": 4.1254034232899256e-06, "loss": 2.9703, "step": 2923500 }, { "epoch": 18.36, "learning_rate": 4.117570418555588e-06, "loss": 2.9701, "step": 2924000 }, { "epoch": 18.36, "learning_rate": 4.109721716416973e-06, "loss": 2.9595, "step": 2924500 }, { "epoch": 18.37, "learning_rate": 4.101873014278359e-06, "loss": 2.9685, "step": 2925000 }, { "epoch": 18.37, "learning_rate": 4.094024312139745e-06, "loss": 2.9672, "step": 2925500 }, { "epoch": 18.37, "learning_rate": 4.086175610001131e-06, "loss": 2.9701, "step": 2926000 }, { "epoch": 18.38, "learning_rate": 4.078326907862516e-06, "loss": 2.9705, "step": 2926500 }, { "epoch": 18.38, "learning_rate": 4.070478205723901e-06, "loss": 2.9638, "step": 2927000 }, { "epoch": 18.38, "learning_rate": 4.062629503585287e-06, "loss": 2.9697, "step": 2927500 }, { "epoch": 18.38, "learning_rate": 4.054780801446673e-06, "loss": 2.969, "step": 2928000 }, { "epoch": 18.39, "learning_rate": 4.046932099308059e-06, "loss": 2.9706, "step": 2928500 }, { "epoch": 18.39, "learning_rate": 4.0390833971694445e-06, "loss": 2.9716, "step": 2929000 }, { "epoch": 18.39, "learning_rate": 4.03123469503083e-06, "loss": 2.966, "step": 2929500 }, { "epoch": 18.4, "learning_rate": 4.023401690296492e-06, "loss": 2.9663, "step": 2930000 }, { "epoch": 18.4, "learning_rate": 4.015552988157879e-06, "loss": 2.9656, "step": 2930500 }, { "epoch": 18.4, "learning_rate": 4.007704286019264e-06, "loss": 2.9631, "step": 2931000 }, { "epoch": 18.41, "learning_rate": 3.99985558388065e-06, "loss": 2.9675, "step": 2931500 }, { "epoch": 18.41, "learning_rate": 3.992022579146312e-06, "loss": 2.9606, "step": 2932000 }, { "epoch": 18.41, "learning_rate": 3.984205271816253e-06, "loss": 2.9666, "step": 2932500 }, { "epoch": 18.42, "learning_rate": 3.976356569677638e-06, "loss": 2.9682, "step": 2933000 }, { "epoch": 18.42, "learning_rate": 3.968507867539024e-06, "loss": 2.9667, "step": 2933500 }, { "epoch": 18.42, "learning_rate": 3.9606591654004095e-06, "loss": 2.9765, "step": 2934000 }, { "epoch": 18.43, "learning_rate": 3.952810463261795e-06, "loss": 2.9626, "step": 2934500 }, { "epoch": 18.43, "learning_rate": 3.944961761123181e-06, "loss": 2.9623, "step": 2935000 }, { "epoch": 18.43, "learning_rate": 3.937113058984566e-06, "loss": 2.9605, "step": 2935500 }, { "epoch": 18.44, "learning_rate": 3.929264356845952e-06, "loss": 2.9632, "step": 2936000 }, { "epoch": 18.44, "learning_rate": 3.921415654707337e-06, "loss": 2.9676, "step": 2936500 }, { "epoch": 18.44, "learning_rate": 3.913566952568724e-06, "loss": 2.9607, "step": 2937000 }, { "epoch": 18.44, "learning_rate": 3.905718250430109e-06, "loss": 2.9559, "step": 2937500 }, { "epoch": 18.45, "learning_rate": 3.897869548291495e-06, "loss": 2.9649, "step": 2938000 }, { "epoch": 18.45, "learning_rate": 3.890036543557157e-06, "loss": 2.9659, "step": 2938500 }, { "epoch": 18.45, "learning_rate": 3.882187841418544e-06, "loss": 2.9686, "step": 2939000 }, { "epoch": 18.46, "learning_rate": 3.874339139279929e-06, "loss": 2.9683, "step": 2939500 }, { "epoch": 18.46, "learning_rate": 3.8665061345455915e-06, "loss": 2.9702, "step": 2940000 }, { "epoch": 18.46, "eval_accuracy": 0.4626719775307193, "eval_loss": 2.8983864784240723, "eval_runtime": 1450.4102, "eval_samples_per_second": 57.777, "eval_steps_per_second": 5.778, "step": 2940000 }, { "epoch": 18.46, "learning_rate": 3.858657432406977e-06, "loss": 2.9768, "step": 2940500 }, { "epoch": 18.47, "learning_rate": 3.8508087302683635e-06, "loss": 2.9694, "step": 2941000 }, { "epoch": 18.47, "learning_rate": 3.842960028129749e-06, "loss": 2.9676, "step": 2941500 }, { "epoch": 18.47, "learning_rate": 3.835111325991135e-06, "loss": 2.9667, "step": 2942000 }, { "epoch": 18.48, "learning_rate": 3.82726262385252e-06, "loss": 2.9624, "step": 2942500 }, { "epoch": 18.48, "learning_rate": 3.819413921713905e-06, "loss": 2.9674, "step": 2943000 }, { "epoch": 18.48, "learning_rate": 3.8115652195752914e-06, "loss": 2.9618, "step": 2943500 }, { "epoch": 18.49, "learning_rate": 3.803716517436677e-06, "loss": 2.9737, "step": 2944000 }, { "epoch": 18.49, "learning_rate": 3.7958678152980626e-06, "loss": 2.9702, "step": 2944500 }, { "epoch": 18.49, "learning_rate": 3.7880348105637253e-06, "loss": 2.9667, "step": 2945000 }, { "epoch": 18.49, "learning_rate": 3.7802018058293883e-06, "loss": 2.9688, "step": 2945500 }, { "epoch": 18.5, "learning_rate": 3.772353103690774e-06, "loss": 2.9671, "step": 2946000 }, { "epoch": 18.5, "learning_rate": 3.7645044015521595e-06, "loss": 2.9653, "step": 2946500 }, { "epoch": 18.5, "learning_rate": 3.756655699413545e-06, "loss": 2.969, "step": 2947000 }, { "epoch": 18.51, "learning_rate": 3.7488069972749307e-06, "loss": 2.9697, "step": 2947500 }, { "epoch": 18.51, "learning_rate": 3.7409582951363167e-06, "loss": 2.964, "step": 2948000 }, { "epoch": 18.51, "learning_rate": 3.7331095929977023e-06, "loss": 2.9633, "step": 2948500 }, { "epoch": 18.52, "learning_rate": 3.725260890859088e-06, "loss": 2.9698, "step": 2949000 }, { "epoch": 18.52, "learning_rate": 3.7174121887204734e-06, "loss": 2.9607, "step": 2949500 }, { "epoch": 18.52, "learning_rate": 3.7095791839861365e-06, "loss": 2.9716, "step": 2950000 }, { "epoch": 18.53, "learning_rate": 3.701730481847522e-06, "loss": 2.9647, "step": 2950500 }, { "epoch": 18.53, "learning_rate": 3.6938817797089077e-06, "loss": 2.9732, "step": 2951000 }, { "epoch": 18.53, "learning_rate": 3.6860330775702933e-06, "loss": 2.9665, "step": 2951500 }, { "epoch": 18.54, "learning_rate": 3.6781843754316784e-06, "loss": 2.9632, "step": 2952000 }, { "epoch": 18.54, "learning_rate": 3.670335673293064e-06, "loss": 2.9692, "step": 2952500 }, { "epoch": 18.54, "learning_rate": 3.6624869711544505e-06, "loss": 2.9641, "step": 2953000 }, { "epoch": 18.54, "learning_rate": 3.6546382690158356e-06, "loss": 2.9636, "step": 2953500 }, { "epoch": 18.55, "learning_rate": 3.6468052642814983e-06, "loss": 2.9688, "step": 2954000 }, { "epoch": 18.55, "learning_rate": 3.6389722595471618e-06, "loss": 2.9674, "step": 2954500 }, { "epoch": 18.55, "learning_rate": 3.6311235574085473e-06, "loss": 2.969, "step": 2955000 }, { "epoch": 18.56, "learning_rate": 3.623274855269933e-06, "loss": 2.9742, "step": 2955500 }, { "epoch": 18.56, "learning_rate": 3.615426153131318e-06, "loss": 2.9695, "step": 2956000 }, { "epoch": 18.56, "learning_rate": 3.6075774509927037e-06, "loss": 2.9658, "step": 2956500 }, { "epoch": 18.57, "learning_rate": 3.59972874885409e-06, "loss": 2.9673, "step": 2957000 }, { "epoch": 18.57, "learning_rate": 3.5918957441197528e-06, "loss": 2.9658, "step": 2957500 }, { "epoch": 18.57, "learning_rate": 3.584047041981138e-06, "loss": 2.9639, "step": 2958000 }, { "epoch": 18.58, "learning_rate": 3.5761983398425235e-06, "loss": 2.9653, "step": 2958500 }, { "epoch": 18.58, "learning_rate": 3.568349637703909e-06, "loss": 2.9616, "step": 2959000 }, { "epoch": 18.58, "learning_rate": 3.560500935565295e-06, "loss": 2.9651, "step": 2959500 }, { "epoch": 18.59, "learning_rate": 3.5526836282352352e-06, "loss": 2.9665, "step": 2960000 }, { "epoch": 18.59, "learning_rate": 3.5448349260966204e-06, "loss": 2.9622, "step": 2960500 }, { "epoch": 18.59, "learning_rate": 3.536986223958007e-06, "loss": 2.964, "step": 2961000 }, { "epoch": 18.6, "learning_rate": 3.5291375218193924e-06, "loss": 2.973, "step": 2961500 }, { "epoch": 18.6, "learning_rate": 3.5212888196807776e-06, "loss": 2.9695, "step": 2962000 }, { "epoch": 18.6, "learning_rate": 3.5134558149464402e-06, "loss": 2.9685, "step": 2962500 }, { "epoch": 18.6, "learning_rate": 3.5056071128078267e-06, "loss": 2.9673, "step": 2963000 }, { "epoch": 18.61, "learning_rate": 3.4977584106692123e-06, "loss": 2.9668, "step": 2963500 }, { "epoch": 18.61, "learning_rate": 3.4899097085305974e-06, "loss": 2.9705, "step": 2964000 }, { "epoch": 18.61, "learning_rate": 3.482061006391983e-06, "loss": 2.9708, "step": 2964500 }, { "epoch": 18.62, "learning_rate": 3.4742123042533686e-06, "loss": 2.9718, "step": 2965000 }, { "epoch": 18.62, "learning_rate": 3.466379299519032e-06, "loss": 2.9716, "step": 2965500 }, { "epoch": 18.62, "learning_rate": 3.4585305973804172e-06, "loss": 2.9668, "step": 2966000 }, { "epoch": 18.63, "learning_rate": 3.450681895241803e-06, "loss": 2.9638, "step": 2966500 }, { "epoch": 18.63, "learning_rate": 3.4428331931031884e-06, "loss": 2.9639, "step": 2967000 }, { "epoch": 18.63, "learning_rate": 3.434984490964574e-06, "loss": 2.9668, "step": 2967500 }, { "epoch": 18.64, "learning_rate": 3.42713578882596e-06, "loss": 2.9642, "step": 2968000 }, { "epoch": 18.64, "learning_rate": 3.4192870866873456e-06, "loss": 2.9659, "step": 2968500 }, { "epoch": 18.64, "learning_rate": 3.411438384548731e-06, "loss": 2.9637, "step": 2969000 }, { "epoch": 18.65, "learning_rate": 3.403589682410117e-06, "loss": 2.966, "step": 2969500 }, { "epoch": 18.65, "learning_rate": 3.39575667767578e-06, "loss": 2.9692, "step": 2970000 }, { "epoch": 18.65, "eval_accuracy": 0.4628513812740962, "eval_loss": 2.8967418670654297, "eval_runtime": 1450.7823, "eval_samples_per_second": 57.763, "eval_steps_per_second": 5.777, "step": 2970000 }, { "epoch": 18.65, "learning_rate": 3.38793937034572e-06, "loss": 2.9699, "step": 2970500 }, { "epoch": 18.65, "learning_rate": 3.380090668207105e-06, "loss": 2.9705, "step": 2971000 }, { "epoch": 18.66, "learning_rate": 3.3722419660684907e-06, "loss": 2.9637, "step": 2971500 }, { "epoch": 18.66, "learning_rate": 3.364393263929877e-06, "loss": 2.9708, "step": 2972000 }, { "epoch": 18.66, "learning_rate": 3.3565445617912623e-06, "loss": 2.971, "step": 2972500 }, { "epoch": 18.67, "learning_rate": 3.348695859652648e-06, "loss": 2.9707, "step": 2973000 }, { "epoch": 18.67, "learning_rate": 3.3408471575140335e-06, "loss": 2.9676, "step": 2973500 }, { "epoch": 18.67, "learning_rate": 3.332998455375419e-06, "loss": 2.9573, "step": 2974000 }, { "epoch": 18.68, "learning_rate": 3.325149753236805e-06, "loss": 2.9643, "step": 2974500 }, { "epoch": 18.68, "learning_rate": 3.3173010510981907e-06, "loss": 2.9648, "step": 2975000 }, { "epoch": 18.68, "learning_rate": 3.3094523489595763e-06, "loss": 2.9648, "step": 2975500 }, { "epoch": 18.69, "learning_rate": 3.301603646820962e-06, "loss": 2.9637, "step": 2976000 }, { "epoch": 18.69, "learning_rate": 3.293754944682347e-06, "loss": 2.9702, "step": 2976500 }, { "epoch": 18.69, "learning_rate": 3.2859219399480105e-06, "loss": 2.9714, "step": 2977000 }, { "epoch": 18.7, "learning_rate": 3.278073237809396e-06, "loss": 2.9729, "step": 2977500 }, { "epoch": 18.7, "learning_rate": 3.2702245356707817e-06, "loss": 2.9694, "step": 2978000 }, { "epoch": 18.7, "learning_rate": 3.2623915309364443e-06, "loss": 2.9668, "step": 2978500 }, { "epoch": 18.71, "learning_rate": 3.2545428287978304e-06, "loss": 2.9694, "step": 2979000 }, { "epoch": 18.71, "learning_rate": 3.246694126659216e-06, "loss": 2.9637, "step": 2979500 }, { "epoch": 18.71, "learning_rate": 3.2388454245206015e-06, "loss": 2.9657, "step": 2980000 }, { "epoch": 18.71, "learning_rate": 3.2309967223819867e-06, "loss": 2.9607, "step": 2980500 }, { "epoch": 18.72, "learning_rate": 3.2231480202433723e-06, "loss": 2.9656, "step": 2981000 }, { "epoch": 18.72, "learning_rate": 3.2152993181047587e-06, "loss": 2.9655, "step": 2981500 }, { "epoch": 18.72, "learning_rate": 3.2074663133704214e-06, "loss": 2.9671, "step": 2982000 }, { "epoch": 18.73, "learning_rate": 3.1996176112318065e-06, "loss": 2.9637, "step": 2982500 }, { "epoch": 18.73, "learning_rate": 3.191768909093192e-06, "loss": 2.9576, "step": 2983000 }, { "epoch": 18.73, "learning_rate": 3.1839202069545785e-06, "loss": 2.9697, "step": 2983500 }, { "epoch": 18.74, "learning_rate": 3.176071504815964e-06, "loss": 2.9678, "step": 2984000 }, { "epoch": 18.74, "learning_rate": 3.1682228026773493e-06, "loss": 2.9628, "step": 2984500 }, { "epoch": 18.74, "learning_rate": 3.160374100538735e-06, "loss": 2.9656, "step": 2985000 }, { "epoch": 18.75, "learning_rate": 3.1525253984001205e-06, "loss": 2.966, "step": 2985500 }, { "epoch": 18.75, "learning_rate": 3.144692393665784e-06, "loss": 2.9687, "step": 2986000 }, { "epoch": 18.75, "learning_rate": 3.136843691527169e-06, "loss": 2.9715, "step": 2986500 }, { "epoch": 18.76, "learning_rate": 3.1289949893885547e-06, "loss": 2.9692, "step": 2987000 }, { "epoch": 18.76, "learning_rate": 3.1211462872499407e-06, "loss": 2.9667, "step": 2987500 }, { "epoch": 18.76, "learning_rate": 3.1133132825156034e-06, "loss": 2.9643, "step": 2988000 }, { "epoch": 18.76, "learning_rate": 3.105464580376989e-06, "loss": 2.9664, "step": 2988500 }, { "epoch": 18.77, "learning_rate": 3.0976158782383745e-06, "loss": 2.9741, "step": 2989000 }, { "epoch": 18.77, "learning_rate": 3.08976717609976e-06, "loss": 2.9671, "step": 2989500 }, { "epoch": 18.77, "learning_rate": 3.081918473961146e-06, "loss": 2.9605, "step": 2990000 }, { "epoch": 18.78, "learning_rate": 3.074085469226809e-06, "loss": 2.9674, "step": 2990500 }, { "epoch": 18.78, "learning_rate": 3.0662367670881944e-06, "loss": 2.9713, "step": 2991000 }, { "epoch": 18.78, "learning_rate": 3.05838806494958e-06, "loss": 2.9615, "step": 2991500 }, { "epoch": 18.79, "learning_rate": 3.050539362810966e-06, "loss": 2.9641, "step": 2992000 }, { "epoch": 18.79, "learning_rate": 3.042690660672351e-06, "loss": 2.9709, "step": 2992500 }, { "epoch": 18.79, "learning_rate": 3.034857655938014e-06, "loss": 2.9647, "step": 2993000 }, { "epoch": 18.8, "learning_rate": 3.0270089537994e-06, "loss": 2.965, "step": 2993500 }, { "epoch": 18.8, "learning_rate": 3.019160251660786e-06, "loss": 2.9705, "step": 2994000 }, { "epoch": 18.8, "learning_rate": 3.011311549522171e-06, "loss": 2.9668, "step": 2994500 }, { "epoch": 18.81, "learning_rate": 3.0034628473835566e-06, "loss": 2.9601, "step": 2995000 }, { "epoch": 18.81, "learning_rate": 2.9956141452449426e-06, "loss": 2.9666, "step": 2995500 }, { "epoch": 18.81, "learning_rate": 2.987765443106328e-06, "loss": 2.9692, "step": 2996000 }, { "epoch": 18.81, "learning_rate": 2.979932438371991e-06, "loss": 2.9652, "step": 2996500 }, { "epoch": 18.82, "learning_rate": 2.9720837362333764e-06, "loss": 2.9564, "step": 2997000 }, { "epoch": 18.82, "learning_rate": 2.9642507314990395e-06, "loss": 2.9612, "step": 2997500 }, { "epoch": 18.82, "learning_rate": 2.956402029360425e-06, "loss": 2.9595, "step": 2998000 }, { "epoch": 18.83, "learning_rate": 2.9485533272218106e-06, "loss": 2.9708, "step": 2998500 }, { "epoch": 18.83, "learning_rate": 2.9407046250831962e-06, "loss": 2.9672, "step": 2999000 }, { "epoch": 18.83, "learning_rate": 2.9328559229445822e-06, "loss": 2.9605, "step": 2999500 }, { "epoch": 18.84, "learning_rate": 2.925007220805968e-06, "loss": 2.9697, "step": 3000000 }, { "epoch": 18.84, "eval_accuracy": 0.4630144893718621, "eval_loss": 2.89540433883667, "eval_runtime": 1449.1859, "eval_samples_per_second": 57.826, "eval_steps_per_second": 5.783, "step": 3000000 }, { "epoch": 18.84, "learning_rate": 2.9171585186673534e-06, "loss": 2.9678, "step": 3000500 }, { "epoch": 18.84, "learning_rate": 2.909309816528739e-06, "loss": 2.9688, "step": 3001000 }, { "epoch": 18.85, "learning_rate": 2.9014611143901246e-06, "loss": 2.9691, "step": 3001500 }, { "epoch": 18.85, "learning_rate": 2.89361241225151e-06, "loss": 2.9641, "step": 3002000 }, { "epoch": 18.85, "learning_rate": 2.8857794075171732e-06, "loss": 2.9657, "step": 3002500 }, { "epoch": 18.86, "learning_rate": 2.877930705378559e-06, "loss": 2.9611, "step": 3003000 }, { "epoch": 18.86, "learning_rate": 2.8700820032399444e-06, "loss": 2.9694, "step": 3003500 }, { "epoch": 18.86, "learning_rate": 2.86223330110133e-06, "loss": 2.9713, "step": 3004000 }, { "epoch": 18.87, "learning_rate": 2.854400296366993e-06, "loss": 2.9664, "step": 3004500 }, { "epoch": 18.87, "learning_rate": 2.8465515942283782e-06, "loss": 2.9622, "step": 3005000 }, { "epoch": 18.87, "learning_rate": 2.8387185894940413e-06, "loss": 2.9657, "step": 3005500 }, { "epoch": 18.87, "learning_rate": 2.8308698873554273e-06, "loss": 2.9687, "step": 3006000 }, { "epoch": 18.88, "learning_rate": 2.823021185216813e-06, "loss": 2.9676, "step": 3006500 }, { "epoch": 18.88, "learning_rate": 2.815172483078198e-06, "loss": 2.9688, "step": 3007000 }, { "epoch": 18.88, "learning_rate": 2.807323780939584e-06, "loss": 2.967, "step": 3007500 }, { "epoch": 18.89, "learning_rate": 2.7994750788009697e-06, "loss": 2.9654, "step": 3008000 }, { "epoch": 18.89, "learning_rate": 2.7916263766623553e-06, "loss": 2.9693, "step": 3008500 }, { "epoch": 18.89, "learning_rate": 2.783777674523741e-06, "loss": 2.9649, "step": 3009000 }, { "epoch": 18.9, "learning_rate": 2.7759289723851264e-06, "loss": 2.9674, "step": 3009500 }, { "epoch": 18.9, "learning_rate": 2.7681116650550665e-06, "loss": 2.9666, "step": 3010000 }, { "epoch": 18.9, "learning_rate": 2.7602629629164526e-06, "loss": 2.9682, "step": 3010500 }, { "epoch": 18.91, "learning_rate": 2.7524142607778377e-06, "loss": 2.9677, "step": 3011000 }, { "epoch": 18.91, "learning_rate": 2.7445655586392233e-06, "loss": 2.9685, "step": 3011500 }, { "epoch": 18.91, "learning_rate": 2.7367168565006093e-06, "loss": 2.958, "step": 3012000 }, { "epoch": 18.92, "learning_rate": 2.728868154361995e-06, "loss": 2.9632, "step": 3012500 }, { "epoch": 18.92, "learning_rate": 2.7210194522233805e-06, "loss": 2.9712, "step": 3013000 }, { "epoch": 18.92, "learning_rate": 2.713170750084766e-06, "loss": 2.9668, "step": 3013500 }, { "epoch": 18.92, "learning_rate": 2.7053220479461517e-06, "loss": 2.9711, "step": 3014000 }, { "epoch": 18.93, "learning_rate": 2.6974733458075377e-06, "loss": 2.9666, "step": 3014500 }, { "epoch": 18.93, "learning_rate": 2.6896403410732e-06, "loss": 2.9647, "step": 3015000 }, { "epoch": 18.93, "learning_rate": 2.681791638934586e-06, "loss": 2.9703, "step": 3015500 }, { "epoch": 18.94, "learning_rate": 2.6739429367959715e-06, "loss": 2.9703, "step": 3016000 }, { "epoch": 18.94, "learning_rate": 2.6660942346573575e-06, "loss": 2.9612, "step": 3016500 }, { "epoch": 18.94, "learning_rate": 2.6582455325187427e-06, "loss": 2.9568, "step": 3017000 }, { "epoch": 18.95, "learning_rate": 2.6503968303801283e-06, "loss": 2.9614, "step": 3017500 }, { "epoch": 18.95, "learning_rate": 2.6425638256457913e-06, "loss": 2.9669, "step": 3018000 }, { "epoch": 18.95, "learning_rate": 2.634715123507177e-06, "loss": 2.9628, "step": 3018500 }, { "epoch": 18.96, "learning_rate": 2.6268664213685625e-06, "loss": 2.959, "step": 3019000 }, { "epoch": 18.96, "learning_rate": 2.619017719229948e-06, "loss": 2.9682, "step": 3019500 }, { "epoch": 18.96, "learning_rate": 2.611169017091334e-06, "loss": 2.9654, "step": 3020000 }, { "epoch": 18.97, "learning_rate": 2.6033203149527197e-06, "loss": 2.9725, "step": 3020500 }, { "epoch": 18.97, "learning_rate": 2.595471612814105e-06, "loss": 2.9659, "step": 3021000 }, { "epoch": 18.97, "learning_rate": 2.587622910675491e-06, "loss": 2.9641, "step": 3021500 }, { "epoch": 18.98, "learning_rate": 2.579805603345431e-06, "loss": 2.9661, "step": 3022000 }, { "epoch": 18.98, "learning_rate": 2.5719569012068166e-06, "loss": 2.9753, "step": 3022500 }, { "epoch": 18.98, "learning_rate": 2.564108199068202e-06, "loss": 2.9652, "step": 3023000 }, { "epoch": 18.98, "learning_rate": 2.5562594969295878e-06, "loss": 2.9578, "step": 3023500 }, { "epoch": 18.99, "learning_rate": 2.5484107947909734e-06, "loss": 2.9597, "step": 3024000 }, { "epoch": 18.99, "learning_rate": 2.5405777900566364e-06, "loss": 2.9685, "step": 3024500 }, { "epoch": 18.99, "learning_rate": 2.532729087918022e-06, "loss": 2.9651, "step": 3025000 }, { "epoch": 19.0, "learning_rate": 2.5248803857794076e-06, "loss": 2.9618, "step": 3025500 }, { "epoch": 19.0, "learning_rate": 2.517031683640793e-06, "loss": 2.9621, "step": 3026000 }, { "epoch": 19.0, "learning_rate": 2.509182981502179e-06, "loss": 2.9544, "step": 3026500 }, { "epoch": 19.01, "learning_rate": 2.5013342793635644e-06, "loss": 2.9622, "step": 3027000 }, { "epoch": 19.01, "learning_rate": 2.4935169720335045e-06, "loss": 2.954, "step": 3027500 }, { "epoch": 19.01, "learning_rate": 2.48566826989489e-06, "loss": 2.9605, "step": 3028000 }, { "epoch": 19.02, "learning_rate": 2.477819567756276e-06, "loss": 2.9655, "step": 3028500 }, { "epoch": 19.02, "learning_rate": 2.4699708656176617e-06, "loss": 2.9516, "step": 3029000 }, { "epoch": 19.02, "learning_rate": 2.4621221634790473e-06, "loss": 2.9554, "step": 3029500 }, { "epoch": 19.03, "learning_rate": 2.454273461340433e-06, "loss": 2.9611, "step": 3030000 }, { "epoch": 19.03, "eval_accuracy": 0.46318085193285313, "eval_loss": 2.894441843032837, "eval_runtime": 1449.1797, "eval_samples_per_second": 57.827, "eval_steps_per_second": 5.783, "step": 3030000 }, { "epoch": 19.03, "learning_rate": 2.4464247592018184e-06, "loss": 2.9524, "step": 3030500 }, { "epoch": 19.03, "learning_rate": 2.438576057063204e-06, "loss": 2.951, "step": 3031000 }, { "epoch": 19.03, "learning_rate": 2.4307273549245896e-06, "loss": 2.9588, "step": 3031500 }, { "epoch": 19.04, "learning_rate": 2.4228943501902527e-06, "loss": 2.9557, "step": 3032000 }, { "epoch": 19.04, "learning_rate": 2.4150613454559157e-06, "loss": 2.9564, "step": 3032500 }, { "epoch": 19.04, "learning_rate": 2.4072126433173013e-06, "loss": 2.9551, "step": 3033000 }, { "epoch": 19.05, "learning_rate": 2.3993639411786865e-06, "loss": 2.9598, "step": 3033500 }, { "epoch": 19.05, "learning_rate": 2.3915152390400725e-06, "loss": 2.9551, "step": 3034000 }, { "epoch": 19.05, "learning_rate": 2.383666536901458e-06, "loss": 2.9532, "step": 3034500 }, { "epoch": 19.06, "learning_rate": 2.375817834762844e-06, "loss": 2.9607, "step": 3035000 }, { "epoch": 19.06, "learning_rate": 2.3679691326242293e-06, "loss": 2.956, "step": 3035500 }, { "epoch": 19.06, "learning_rate": 2.360120430485615e-06, "loss": 2.9638, "step": 3036000 }, { "epoch": 19.07, "learning_rate": 2.352271728347001e-06, "loss": 2.9531, "step": 3036500 }, { "epoch": 19.07, "learning_rate": 2.3444230262083865e-06, "loss": 2.9633, "step": 3037000 }, { "epoch": 19.07, "learning_rate": 2.336590021474049e-06, "loss": 2.9622, "step": 3037500 }, { "epoch": 19.08, "learning_rate": 2.3287413193354347e-06, "loss": 2.9598, "step": 3038000 }, { "epoch": 19.08, "learning_rate": 2.3208926171968207e-06, "loss": 2.9618, "step": 3038500 }, { "epoch": 19.08, "learning_rate": 2.3130439150582063e-06, "loss": 2.9574, "step": 3039000 }, { "epoch": 19.08, "learning_rate": 2.3051952129195914e-06, "loss": 2.9565, "step": 3039500 }, { "epoch": 19.09, "learning_rate": 2.2973465107809775e-06, "loss": 2.9669, "step": 3040000 }, { "epoch": 19.09, "learning_rate": 2.289497808642363e-06, "loss": 2.958, "step": 3040500 }, { "epoch": 19.09, "learning_rate": 2.2816491065037486e-06, "loss": 2.9563, "step": 3041000 }, { "epoch": 19.1, "learning_rate": 2.2738161017694113e-06, "loss": 2.9575, "step": 3041500 }, { "epoch": 19.1, "learning_rate": 2.2659673996307973e-06, "loss": 2.962, "step": 3042000 }, { "epoch": 19.1, "learning_rate": 2.25813439489646e-06, "loss": 2.9557, "step": 3042500 }, { "epoch": 19.11, "learning_rate": 2.250285692757846e-06, "loss": 2.959, "step": 3043000 }, { "epoch": 19.11, "learning_rate": 2.242436990619231e-06, "loss": 2.9573, "step": 3043500 }, { "epoch": 19.11, "learning_rate": 2.2345882884806167e-06, "loss": 2.962, "step": 3044000 }, { "epoch": 19.12, "learning_rate": 2.2267395863420027e-06, "loss": 2.9545, "step": 3044500 }, { "epoch": 19.12, "learning_rate": 2.2189065816076658e-06, "loss": 2.9578, "step": 3045000 }, { "epoch": 19.12, "learning_rate": 2.211057879469051e-06, "loss": 2.9695, "step": 3045500 }, { "epoch": 19.13, "learning_rate": 2.2032091773304365e-06, "loss": 2.9611, "step": 3046000 }, { "epoch": 19.13, "learning_rate": 2.1953604751918225e-06, "loss": 2.9553, "step": 3046500 }, { "epoch": 19.13, "learning_rate": 2.187511773053208e-06, "loss": 2.9584, "step": 3047000 }, { "epoch": 19.14, "learning_rate": 2.1796787683188708e-06, "loss": 2.9556, "step": 3047500 }, { "epoch": 19.14, "learning_rate": 2.1718300661802564e-06, "loss": 2.9545, "step": 3048000 }, { "epoch": 19.14, "learning_rate": 2.1639813640416424e-06, "loss": 2.9626, "step": 3048500 }, { "epoch": 19.14, "learning_rate": 2.156132661903028e-06, "loss": 2.957, "step": 3049000 }, { "epoch": 19.15, "learning_rate": 2.148283959764413e-06, "loss": 2.9568, "step": 3049500 }, { "epoch": 19.15, "learning_rate": 2.140435257625799e-06, "loss": 2.96, "step": 3050000 }, { "epoch": 19.15, "learning_rate": 2.1325865554871847e-06, "loss": 2.9603, "step": 3050500 }, { "epoch": 19.16, "learning_rate": 2.1247378533485707e-06, "loss": 2.9593, "step": 3051000 }, { "epoch": 19.16, "learning_rate": 2.116889151209956e-06, "loss": 2.9599, "step": 3051500 }, { "epoch": 19.16, "learning_rate": 2.109056146475619e-06, "loss": 2.9579, "step": 3052000 }, { "epoch": 19.17, "learning_rate": 2.1012074443370046e-06, "loss": 2.9555, "step": 3052500 }, { "epoch": 19.17, "learning_rate": 2.09335874219839e-06, "loss": 2.9557, "step": 3053000 }, { "epoch": 19.17, "learning_rate": 2.085525737464053e-06, "loss": 2.9615, "step": 3053500 }, { "epoch": 19.18, "learning_rate": 2.077677035325439e-06, "loss": 2.9565, "step": 3054000 }, { "epoch": 19.18, "learning_rate": 2.0698283331868244e-06, "loss": 2.9551, "step": 3054500 }, { "epoch": 19.18, "learning_rate": 2.06197963104821e-06, "loss": 2.9545, "step": 3055000 }, { "epoch": 19.19, "learning_rate": 2.0541309289095956e-06, "loss": 2.9639, "step": 3055500 }, { "epoch": 19.19, "learning_rate": 2.046282226770981e-06, "loss": 2.9576, "step": 3056000 }, { "epoch": 19.19, "learning_rate": 2.0384335246323667e-06, "loss": 2.9614, "step": 3056500 }, { "epoch": 19.19, "learning_rate": 2.0305848224937527e-06, "loss": 2.954, "step": 3057000 }, { "epoch": 19.2, "learning_rate": 2.022736120355138e-06, "loss": 2.9567, "step": 3057500 }, { "epoch": 19.2, "learning_rate": 2.014903115620801e-06, "loss": 2.9605, "step": 3058000 }, { "epoch": 19.2, "learning_rate": 2.007070110886464e-06, "loss": 2.9608, "step": 3058500 }, { "epoch": 19.21, "learning_rate": 1.9992214087478496e-06, "loss": 2.955, "step": 3059000 }, { "epoch": 19.21, "learning_rate": 1.9913727066092352e-06, "loss": 2.9516, "step": 3059500 }, { "epoch": 19.21, "learning_rate": 1.983524004470621e-06, "loss": 2.9573, "step": 3060000 }, { "epoch": 19.21, "eval_accuracy": 0.46331422660136884, "eval_loss": 2.8938393592834473, "eval_runtime": 1451.1195, "eval_samples_per_second": 57.749, "eval_steps_per_second": 5.776, "step": 3060000 }, { "epoch": 19.22, "learning_rate": 1.9756753023320064e-06, "loss": 2.9568, "step": 3060500 }, { "epoch": 19.22, "learning_rate": 1.9678266001933924e-06, "loss": 2.9595, "step": 3061000 }, { "epoch": 19.22, "learning_rate": 1.9599778980547776e-06, "loss": 2.958, "step": 3061500 }, { "epoch": 19.23, "learning_rate": 1.952129195916163e-06, "loss": 2.9554, "step": 3062000 }, { "epoch": 19.23, "learning_rate": 1.944280493777549e-06, "loss": 2.9585, "step": 3062500 }, { "epoch": 19.23, "learning_rate": 1.9364317916389348e-06, "loss": 2.9596, "step": 3063000 }, { "epoch": 19.24, "learning_rate": 1.9285987869045974e-06, "loss": 2.9549, "step": 3063500 }, { "epoch": 19.24, "learning_rate": 1.920750084765983e-06, "loss": 2.9581, "step": 3064000 }, { "epoch": 19.24, "learning_rate": 1.912901382627369e-06, "loss": 2.9546, "step": 3064500 }, { "epoch": 19.25, "learning_rate": 1.9050526804887544e-06, "loss": 2.9559, "step": 3065000 }, { "epoch": 19.25, "learning_rate": 1.89720397835014e-06, "loss": 2.96, "step": 3065500 }, { "epoch": 19.25, "learning_rate": 1.8893552762115258e-06, "loss": 2.9587, "step": 3066000 }, { "epoch": 19.25, "learning_rate": 1.8815065740729114e-06, "loss": 2.9509, "step": 3066500 }, { "epoch": 19.26, "learning_rate": 1.8736735693385742e-06, "loss": 2.9532, "step": 3067000 }, { "epoch": 19.26, "learning_rate": 1.8658248671999598e-06, "loss": 2.952, "step": 3067500 }, { "epoch": 19.26, "learning_rate": 1.8579761650613456e-06, "loss": 2.9649, "step": 3068000 }, { "epoch": 19.27, "learning_rate": 1.8501274629227312e-06, "loss": 2.9529, "step": 3068500 }, { "epoch": 19.27, "learning_rate": 1.8422787607841166e-06, "loss": 2.9565, "step": 3069000 }, { "epoch": 19.27, "learning_rate": 1.8344457560497796e-06, "loss": 2.9605, "step": 3069500 }, { "epoch": 19.28, "learning_rate": 1.8265970539111654e-06, "loss": 2.955, "step": 3070000 }, { "epoch": 19.28, "learning_rate": 1.818748351772551e-06, "loss": 2.9623, "step": 3070500 }, { "epoch": 19.28, "learning_rate": 1.8108996496339366e-06, "loss": 2.9559, "step": 3071000 }, { "epoch": 19.29, "learning_rate": 1.8030509474953224e-06, "loss": 2.9552, "step": 3071500 }, { "epoch": 19.29, "learning_rate": 1.7952022453567078e-06, "loss": 2.9602, "step": 3072000 }, { "epoch": 19.29, "learning_rate": 1.7873535432180938e-06, "loss": 2.9631, "step": 3072500 }, { "epoch": 19.3, "learning_rate": 1.7795205384837564e-06, "loss": 2.9602, "step": 3073000 }, { "epoch": 19.3, "learning_rate": 1.7716718363451422e-06, "loss": 2.9585, "step": 3073500 }, { "epoch": 19.3, "learning_rate": 1.7638231342065276e-06, "loss": 2.9553, "step": 3074000 }, { "epoch": 19.3, "learning_rate": 1.7559744320679132e-06, "loss": 2.955, "step": 3074500 }, { "epoch": 19.31, "learning_rate": 1.748125729929299e-06, "loss": 2.9592, "step": 3075000 }, { "epoch": 19.31, "learning_rate": 1.7402927251949616e-06, "loss": 2.9584, "step": 3075500 }, { "epoch": 19.31, "learning_rate": 1.7324440230563474e-06, "loss": 2.9566, "step": 3076000 }, { "epoch": 19.32, "learning_rate": 1.724595320917733e-06, "loss": 2.9579, "step": 3076500 }, { "epoch": 19.32, "learning_rate": 1.7167466187791188e-06, "loss": 2.9599, "step": 3077000 }, { "epoch": 19.32, "learning_rate": 1.7089136140447815e-06, "loss": 2.9594, "step": 3077500 }, { "epoch": 19.33, "learning_rate": 1.7010806093104445e-06, "loss": 2.9636, "step": 3078000 }, { "epoch": 19.33, "learning_rate": 1.69323190717183e-06, "loss": 2.9575, "step": 3078500 }, { "epoch": 19.33, "learning_rate": 1.685383205033216e-06, "loss": 2.965, "step": 3079000 }, { "epoch": 19.34, "learning_rate": 1.6775345028946013e-06, "loss": 2.9662, "step": 3079500 }, { "epoch": 19.34, "learning_rate": 1.6696858007559873e-06, "loss": 2.9661, "step": 3080000 }, { "epoch": 19.34, "learning_rate": 1.6618370986173727e-06, "loss": 2.96, "step": 3080500 }, { "epoch": 19.35, "learning_rate": 1.6539883964787583e-06, "loss": 2.9579, "step": 3081000 }, { "epoch": 19.35, "learning_rate": 1.646139694340144e-06, "loss": 2.9581, "step": 3081500 }, { "epoch": 19.35, "learning_rate": 1.6383066896058067e-06, "loss": 2.9581, "step": 3082000 }, { "epoch": 19.35, "learning_rate": 1.6304736848714698e-06, "loss": 2.9599, "step": 3082500 }, { "epoch": 19.36, "learning_rate": 1.6226249827328556e-06, "loss": 2.9567, "step": 3083000 }, { "epoch": 19.36, "learning_rate": 1.614776280594241e-06, "loss": 2.9599, "step": 3083500 }, { "epoch": 19.36, "learning_rate": 1.606943275859904e-06, "loss": 2.9564, "step": 3084000 }, { "epoch": 19.37, "learning_rate": 1.5990945737212896e-06, "loss": 2.9647, "step": 3084500 }, { "epoch": 19.37, "learning_rate": 1.591245871582675e-06, "loss": 2.9591, "step": 3085000 }, { "epoch": 19.37, "learning_rate": 1.5833971694440608e-06, "loss": 2.9545, "step": 3085500 }, { "epoch": 19.38, "learning_rate": 1.5755484673054464e-06, "loss": 2.9581, "step": 3086000 }, { "epoch": 19.38, "learning_rate": 1.5676997651668322e-06, "loss": 2.9585, "step": 3086500 }, { "epoch": 19.38, "learning_rate": 1.5598510630282178e-06, "loss": 2.9563, "step": 3087000 }, { "epoch": 19.39, "learning_rate": 1.5520023608896034e-06, "loss": 2.9605, "step": 3087500 }, { "epoch": 19.39, "learning_rate": 1.5441536587509892e-06, "loss": 2.9674, "step": 3088000 }, { "epoch": 19.39, "learning_rate": 1.5363049566123745e-06, "loss": 2.9623, "step": 3088500 }, { "epoch": 19.4, "learning_rate": 1.5284562544737603e-06, "loss": 2.9589, "step": 3089000 }, { "epoch": 19.4, "learning_rate": 1.520607552335146e-06, "loss": 2.9553, "step": 3089500 }, { "epoch": 19.4, "learning_rate": 1.5127588501965317e-06, "loss": 2.9568, "step": 3090000 }, { "epoch": 19.4, "eval_accuracy": 0.463482257220572, "eval_loss": 2.892745018005371, "eval_runtime": 1446.5226, "eval_samples_per_second": 57.933, "eval_steps_per_second": 5.794, "step": 3090000 }, { "epoch": 19.41, "learning_rate": 1.504910148057917e-06, "loss": 2.9568, "step": 3090500 }, { "epoch": 19.41, "learning_rate": 1.497061445919303e-06, "loss": 2.966, "step": 3091000 }, { "epoch": 19.41, "learning_rate": 1.4892127437806885e-06, "loss": 2.9594, "step": 3091500 }, { "epoch": 19.41, "learning_rate": 1.481364041642074e-06, "loss": 2.9563, "step": 3092000 }, { "epoch": 19.42, "learning_rate": 1.473531036907737e-06, "loss": 2.9579, "step": 3092500 }, { "epoch": 19.42, "learning_rate": 1.4656823347691227e-06, "loss": 2.9604, "step": 3093000 }, { "epoch": 19.42, "learning_rate": 1.4578336326305083e-06, "loss": 2.9643, "step": 3093500 }, { "epoch": 19.43, "learning_rate": 1.449984930491894e-06, "loss": 2.9658, "step": 3094000 }, { "epoch": 19.43, "learning_rate": 1.4421362283532795e-06, "loss": 2.9585, "step": 3094500 }, { "epoch": 19.43, "learning_rate": 1.4342875262146653e-06, "loss": 2.9557, "step": 3095000 }, { "epoch": 19.44, "learning_rate": 1.426454521480328e-06, "loss": 2.955, "step": 3095500 }, { "epoch": 19.44, "learning_rate": 1.4186058193417137e-06, "loss": 2.9579, "step": 3096000 }, { "epoch": 19.44, "learning_rate": 1.4107571172030993e-06, "loss": 2.9596, "step": 3096500 }, { "epoch": 19.45, "learning_rate": 1.4029084150644851e-06, "loss": 2.9625, "step": 3097000 }, { "epoch": 19.45, "learning_rate": 1.3950597129258707e-06, "loss": 2.9552, "step": 3097500 }, { "epoch": 19.45, "learning_rate": 1.3872110107872563e-06, "loss": 2.965, "step": 3098000 }, { "epoch": 19.46, "learning_rate": 1.3793623086486419e-06, "loss": 2.9596, "step": 3098500 }, { "epoch": 19.46, "learning_rate": 1.371545001318582e-06, "loss": 2.9603, "step": 3099000 }, { "epoch": 19.46, "learning_rate": 1.3636962991799676e-06, "loss": 2.9593, "step": 3099500 }, { "epoch": 19.46, "learning_rate": 1.3558475970413534e-06, "loss": 2.9575, "step": 3100000 }, { "epoch": 19.47, "learning_rate": 1.3479988949027388e-06, "loss": 2.9622, "step": 3100500 }, { "epoch": 19.47, "learning_rate": 1.3401501927641246e-06, "loss": 2.9556, "step": 3101000 }, { "epoch": 19.47, "learning_rate": 1.3323014906255102e-06, "loss": 2.9645, "step": 3101500 }, { "epoch": 19.48, "learning_rate": 1.324468485891173e-06, "loss": 2.9669, "step": 3102000 }, { "epoch": 19.48, "learning_rate": 1.3166197837525586e-06, "loss": 2.9568, "step": 3102500 }, { "epoch": 19.48, "learning_rate": 1.3087710816139444e-06, "loss": 2.9591, "step": 3103000 }, { "epoch": 19.49, "learning_rate": 1.30092237947533e-06, "loss": 2.9537, "step": 3103500 }, { "epoch": 19.49, "learning_rate": 1.2930736773367158e-06, "loss": 2.9624, "step": 3104000 }, { "epoch": 19.49, "learning_rate": 1.2852249751981012e-06, "loss": 2.9607, "step": 3104500 }, { "epoch": 19.5, "learning_rate": 1.277376273059487e-06, "loss": 2.9674, "step": 3105000 }, { "epoch": 19.5, "learning_rate": 1.2695275709208726e-06, "loss": 2.9603, "step": 3105500 }, { "epoch": 19.5, "learning_rate": 1.2616788687822584e-06, "loss": 2.9613, "step": 3106000 }, { "epoch": 19.51, "learning_rate": 1.253845864047921e-06, "loss": 2.9597, "step": 3106500 }, { "epoch": 19.51, "learning_rate": 1.2459971619093068e-06, "loss": 2.9567, "step": 3107000 }, { "epoch": 19.51, "learning_rate": 1.2381484597706924e-06, "loss": 2.9643, "step": 3107500 }, { "epoch": 19.52, "learning_rate": 1.2303154550363552e-06, "loss": 2.9561, "step": 3108000 }, { "epoch": 19.52, "learning_rate": 1.2224667528977408e-06, "loss": 2.9554, "step": 3108500 }, { "epoch": 19.52, "learning_rate": 1.2146180507591266e-06, "loss": 2.96, "step": 3109000 }, { "epoch": 19.52, "learning_rate": 1.206769348620512e-06, "loss": 2.9645, "step": 3109500 }, { "epoch": 19.53, "learning_rate": 1.1989206464818978e-06, "loss": 2.9646, "step": 3110000 }, { "epoch": 19.53, "learning_rate": 1.1910876417475607e-06, "loss": 2.9506, "step": 3110500 }, { "epoch": 19.53, "learning_rate": 1.1832389396089462e-06, "loss": 2.9577, "step": 3111000 }, { "epoch": 19.54, "learning_rate": 1.175390237470332e-06, "loss": 2.9528, "step": 3111500 }, { "epoch": 19.54, "learning_rate": 1.1675415353317176e-06, "loss": 2.9583, "step": 3112000 }, { "epoch": 19.54, "learning_rate": 1.1597085305973805e-06, "loss": 2.9631, "step": 3112500 }, { "epoch": 19.55, "learning_rate": 1.151859828458766e-06, "loss": 2.9615, "step": 3113000 }, { "epoch": 19.55, "learning_rate": 1.1440111263201519e-06, "loss": 2.9586, "step": 3113500 }, { "epoch": 19.55, "learning_rate": 1.1361624241815375e-06, "loss": 2.96, "step": 3114000 }, { "epoch": 19.56, "learning_rate": 1.128313722042923e-06, "loss": 2.953, "step": 3114500 }, { "epoch": 19.56, "learning_rate": 1.1204650199043086e-06, "loss": 2.9614, "step": 3115000 }, { "epoch": 19.56, "learning_rate": 1.1126163177656942e-06, "loss": 2.9618, "step": 3115500 }, { "epoch": 19.57, "learning_rate": 1.10476761562708e-06, "loss": 2.9598, "step": 3116000 }, { "epoch": 19.57, "learning_rate": 1.0969189134884656e-06, "loss": 2.9589, "step": 3116500 }, { "epoch": 19.57, "learning_rate": 1.0891016061584057e-06, "loss": 2.9535, "step": 3117000 }, { "epoch": 19.57, "learning_rate": 1.0812529040197913e-06, "loss": 2.9535, "step": 3117500 }, { "epoch": 19.58, "learning_rate": 1.073404201881177e-06, "loss": 2.9628, "step": 3118000 }, { "epoch": 19.58, "learning_rate": 1.0655554997425627e-06, "loss": 2.9578, "step": 3118500 }, { "epoch": 19.58, "learning_rate": 1.0577067976039483e-06, "loss": 2.961, "step": 3119000 }, { "epoch": 19.59, "learning_rate": 1.0498580954653339e-06, "loss": 2.9623, "step": 3119500 }, { "epoch": 19.59, "learning_rate": 1.0420093933267195e-06, "loss": 2.9583, "step": 3120000 }, { "epoch": 19.59, "eval_accuracy": 0.46357893460842037, "eval_loss": 2.892308235168457, "eval_runtime": 1446.7581, "eval_samples_per_second": 57.923, "eval_steps_per_second": 5.793, "step": 3120000 }, { "epoch": 19.59, "learning_rate": 1.0341606911881053e-06, "loss": 2.9599, "step": 3120500 }, { "epoch": 19.6, "learning_rate": 1.0263119890494909e-06, "loss": 2.9579, "step": 3121000 }, { "epoch": 19.6, "learning_rate": 1.0184632869108765e-06, "loss": 2.9572, "step": 3121500 }, { "epoch": 19.6, "learning_rate": 1.010614584772262e-06, "loss": 2.9602, "step": 3122000 }, { "epoch": 19.61, "learning_rate": 1.0027658826336476e-06, "loss": 2.954, "step": 3122500 }, { "epoch": 19.61, "learning_rate": 9.949328778993107e-07, "loss": 2.9581, "step": 3123000 }, { "epoch": 19.61, "learning_rate": 9.870998731649735e-07, "loss": 2.9591, "step": 3123500 }, { "epoch": 19.62, "learning_rate": 9.792511710263591e-07, "loss": 2.9579, "step": 3124000 }, { "epoch": 19.62, "learning_rate": 9.71402468887745e-07, "loss": 2.9583, "step": 3124500 }, { "epoch": 19.62, "learning_rate": 9.635537667491303e-07, "loss": 2.9594, "step": 3125000 }, { "epoch": 19.62, "learning_rate": 9.557050646105161e-07, "loss": 2.9666, "step": 3125500 }, { "epoch": 19.63, "learning_rate": 9.478563624719017e-07, "loss": 2.9545, "step": 3126000 }, { "epoch": 19.63, "learning_rate": 9.400076603332874e-07, "loss": 2.9569, "step": 3126500 }, { "epoch": 19.63, "learning_rate": 9.321589581946729e-07, "loss": 2.9573, "step": 3127000 }, { "epoch": 19.64, "learning_rate": 9.243259534603358e-07, "loss": 2.9544, "step": 3127500 }, { "epoch": 19.64, "learning_rate": 9.164772513217215e-07, "loss": 2.96, "step": 3128000 }, { "epoch": 19.64, "learning_rate": 9.08628549183107e-07, "loss": 2.9529, "step": 3128500 }, { "epoch": 19.65, "learning_rate": 9.007798470444927e-07, "loss": 2.9563, "step": 3129000 }, { "epoch": 19.65, "learning_rate": 8.929625397144328e-07, "loss": 2.9562, "step": 3129500 }, { "epoch": 19.65, "learning_rate": 8.851138375758185e-07, "loss": 2.9564, "step": 3130000 }, { "epoch": 19.66, "learning_rate": 8.772651354372042e-07, "loss": 2.9566, "step": 3130500 }, { "epoch": 19.66, "learning_rate": 8.69432130702867e-07, "loss": 2.9552, "step": 3131000 }, { "epoch": 19.66, "learning_rate": 8.615834285642526e-07, "loss": 2.9622, "step": 3131500 }, { "epoch": 19.67, "learning_rate": 8.537347264256383e-07, "loss": 2.9623, "step": 3132000 }, { "epoch": 19.67, "learning_rate": 8.458860242870238e-07, "loss": 2.9568, "step": 3132500 }, { "epoch": 19.67, "learning_rate": 8.380373221484095e-07, "loss": 2.9604, "step": 3133000 }, { "epoch": 19.68, "learning_rate": 8.301886200097952e-07, "loss": 2.9601, "step": 3133500 }, { "epoch": 19.68, "learning_rate": 8.223399178711809e-07, "loss": 2.9583, "step": 3134000 }, { "epoch": 19.68, "learning_rate": 8.144912157325666e-07, "loss": 2.9497, "step": 3134500 }, { "epoch": 19.68, "learning_rate": 8.066425135939521e-07, "loss": 2.9575, "step": 3135000 }, { "epoch": 19.69, "learning_rate": 7.98809508859615e-07, "loss": 2.9595, "step": 3135500 }, { "epoch": 19.69, "learning_rate": 7.909765041252779e-07, "loss": 2.9516, "step": 3136000 }, { "epoch": 19.69, "learning_rate": 7.831278019866635e-07, "loss": 2.957, "step": 3136500 }, { "epoch": 19.7, "learning_rate": 7.752790998480491e-07, "loss": 2.9551, "step": 3137000 }, { "epoch": 19.7, "learning_rate": 7.674303977094348e-07, "loss": 2.9582, "step": 3137500 }, { "epoch": 19.7, "learning_rate": 7.595816955708205e-07, "loss": 2.9594, "step": 3138000 }, { "epoch": 19.71, "learning_rate": 7.51732993432206e-07, "loss": 2.961, "step": 3138500 }, { "epoch": 19.71, "learning_rate": 7.438842912935917e-07, "loss": 2.9594, "step": 3139000 }, { "epoch": 19.71, "learning_rate": 7.360355891549773e-07, "loss": 2.9629, "step": 3139500 }, { "epoch": 19.72, "learning_rate": 7.282025844206402e-07, "loss": 2.959, "step": 3140000 }, { "epoch": 19.72, "learning_rate": 7.203538822820259e-07, "loss": 2.9599, "step": 3140500 }, { "epoch": 19.72, "learning_rate": 7.125051801434115e-07, "loss": 2.9596, "step": 3141000 }, { "epoch": 19.73, "learning_rate": 7.046564780047972e-07, "loss": 2.9607, "step": 3141500 }, { "epoch": 19.73, "learning_rate": 6.968077758661828e-07, "loss": 2.9578, "step": 3142000 }, { "epoch": 19.73, "learning_rate": 6.889590737275684e-07, "loss": 2.9537, "step": 3142500 }, { "epoch": 19.73, "learning_rate": 6.81110371588954e-07, "loss": 2.9666, "step": 3143000 }, { "epoch": 19.74, "learning_rate": 6.732616694503397e-07, "loss": 2.9577, "step": 3143500 }, { "epoch": 19.74, "learning_rate": 6.654286647160026e-07, "loss": 2.9557, "step": 3144000 }, { "epoch": 19.74, "learning_rate": 6.575799625773882e-07, "loss": 2.9639, "step": 3144500 }, { "epoch": 19.75, "learning_rate": 6.497312604387739e-07, "loss": 2.9584, "step": 3145000 }, { "epoch": 19.75, "learning_rate": 6.418982557044367e-07, "loss": 2.9586, "step": 3145500 }, { "epoch": 19.75, "learning_rate": 6.340495535658224e-07, "loss": 2.957, "step": 3146000 }, { "epoch": 19.76, "learning_rate": 6.26200851427208e-07, "loss": 2.9627, "step": 3146500 }, { "epoch": 19.76, "learning_rate": 6.183521492885936e-07, "loss": 2.9605, "step": 3147000 }, { "epoch": 19.76, "learning_rate": 6.105034471499793e-07, "loss": 2.9544, "step": 3147500 }, { "epoch": 19.77, "learning_rate": 6.02654745011365e-07, "loss": 2.9541, "step": 3148000 }, { "epoch": 19.77, "learning_rate": 5.948060428727506e-07, "loss": 2.9602, "step": 3148500 }, { "epoch": 19.77, "learning_rate": 5.869730381384134e-07, "loss": 2.9566, "step": 3149000 }, { "epoch": 19.78, "learning_rate": 5.791243359997991e-07, "loss": 2.9568, "step": 3149500 }, { "epoch": 19.78, "learning_rate": 5.712756338611847e-07, "loss": 2.959, "step": 3150000 }, { "epoch": 19.78, "eval_accuracy": 0.46370356071987934, "eval_loss": 2.8914310932159424, "eval_runtime": 1446.7255, "eval_samples_per_second": 57.925, "eval_steps_per_second": 5.793, "step": 3150000 }, { "epoch": 19.78, "learning_rate": 5.634269317225704e-07, "loss": 2.9621, "step": 3150500 }, { "epoch": 19.79, "learning_rate": 5.55578229583956e-07, "loss": 2.9652, "step": 3151000 }, { "epoch": 19.79, "learning_rate": 5.477452248496189e-07, "loss": 2.9565, "step": 3151500 }, { "epoch": 19.79, "learning_rate": 5.398965227110045e-07, "loss": 2.9616, "step": 3152000 }, { "epoch": 19.79, "learning_rate": 5.320478205723901e-07, "loss": 2.9616, "step": 3152500 }, { "epoch": 19.8, "learning_rate": 5.241991184337758e-07, "loss": 2.9508, "step": 3153000 }, { "epoch": 19.8, "learning_rate": 5.163504162951614e-07, "loss": 2.9524, "step": 3153500 }, { "epoch": 19.8, "learning_rate": 5.085017141565471e-07, "loss": 2.9602, "step": 3154000 }, { "epoch": 19.81, "learning_rate": 5.006530120179327e-07, "loss": 2.951, "step": 3154500 }, { "epoch": 19.81, "learning_rate": 4.928043098793184e-07, "loss": 2.9622, "step": 3155000 }, { "epoch": 19.81, "learning_rate": 4.849713051449812e-07, "loss": 2.9568, "step": 3155500 }, { "epoch": 19.82, "learning_rate": 4.771226030063668e-07, "loss": 2.9547, "step": 3156000 }, { "epoch": 19.82, "learning_rate": 4.6927390086775257e-07, "loss": 2.9592, "step": 3156500 }, { "epoch": 19.82, "learning_rate": 4.6142519872913816e-07, "loss": 2.9466, "step": 3157000 }, { "epoch": 19.83, "learning_rate": 4.53592193994801e-07, "loss": 2.9506, "step": 3157500 }, { "epoch": 19.83, "learning_rate": 4.457434918561867e-07, "loss": 2.9576, "step": 3158000 }, { "epoch": 19.83, "learning_rate": 4.378947897175723e-07, "loss": 2.9554, "step": 3158500 }, { "epoch": 19.84, "learning_rate": 4.300617849832352e-07, "loss": 2.9554, "step": 3159000 }, { "epoch": 19.84, "learning_rate": 4.222130828446209e-07, "loss": 2.9604, "step": 3159500 }, { "epoch": 19.84, "learning_rate": 4.143643807060065e-07, "loss": 2.9551, "step": 3160000 }, { "epoch": 19.84, "learning_rate": 4.065156785673921e-07, "loss": 2.9568, "step": 3160500 }, { "epoch": 19.85, "learning_rate": 3.986669764287777e-07, "loss": 2.9522, "step": 3161000 }, { "epoch": 19.85, "learning_rate": 3.908339716944406e-07, "loss": 2.9513, "step": 3161500 }, { "epoch": 19.85, "learning_rate": 3.8298526955582626e-07, "loss": 2.9586, "step": 3162000 }, { "epoch": 19.86, "learning_rate": 3.751365674172119e-07, "loss": 2.9557, "step": 3162500 }, { "epoch": 19.86, "learning_rate": 3.6728786527859754e-07, "loss": 2.9601, "step": 3163000 }, { "epoch": 19.86, "learning_rate": 3.594391631399832e-07, "loss": 2.9652, "step": 3163500 }, { "epoch": 19.87, "learning_rate": 3.515904610013688e-07, "loss": 2.956, "step": 3164000 }, { "epoch": 19.87, "learning_rate": 3.4374175886275447e-07, "loss": 2.959, "step": 3164500 }, { "epoch": 19.87, "learning_rate": 3.359087541284173e-07, "loss": 2.9635, "step": 3165000 }, { "epoch": 19.88, "learning_rate": 3.28060051989803e-07, "loss": 2.9559, "step": 3165500 }, { "epoch": 19.88, "learning_rate": 3.2021134985118865e-07, "loss": 2.9562, "step": 3166000 }, { "epoch": 19.88, "learning_rate": 3.123940425211287e-07, "loss": 2.9595, "step": 3166500 }, { "epoch": 19.89, "learning_rate": 3.0454534038251436e-07, "loss": 2.9567, "step": 3167000 }, { "epoch": 19.89, "learning_rate": 2.966966382439e-07, "loss": 2.9604, "step": 3167500 }, { "epoch": 19.89, "learning_rate": 2.8884793610528564e-07, "loss": 2.9612, "step": 3168000 }, { "epoch": 19.89, "learning_rate": 2.8101493137094854e-07, "loss": 2.9592, "step": 3168500 }, { "epoch": 19.9, "learning_rate": 2.731662292323342e-07, "loss": 2.9564, "step": 3169000 }, { "epoch": 19.9, "learning_rate": 2.653175270937198e-07, "loss": 2.9531, "step": 3169500 }, { "epoch": 19.9, "learning_rate": 2.574688249551054e-07, "loss": 2.9508, "step": 3170000 }, { "epoch": 19.91, "learning_rate": 2.4962012281649106e-07, "loss": 2.9616, "step": 3170500 }, { "epoch": 19.91, "learning_rate": 2.417714206778767e-07, "loss": 2.9554, "step": 3171000 }, { "epoch": 19.91, "learning_rate": 2.3392271853926234e-07, "loss": 2.9519, "step": 3171500 }, { "epoch": 19.92, "learning_rate": 2.26074016400648e-07, "loss": 2.9532, "step": 3172000 }, { "epoch": 19.92, "learning_rate": 2.1822531426203365e-07, "loss": 2.9569, "step": 3172500 }, { "epoch": 19.92, "learning_rate": 2.103766121234193e-07, "loss": 2.9653, "step": 3173000 }, { "epoch": 19.93, "learning_rate": 2.0252790998480493e-07, "loss": 2.9583, "step": 3173500 }, { "epoch": 19.93, "learning_rate": 1.9467920784619055e-07, "loss": 2.9598, "step": 3174000 }, { "epoch": 19.93, "learning_rate": 1.868305057075762e-07, "loss": 2.9586, "step": 3174500 }, { "epoch": 19.94, "learning_rate": 1.789975009732391e-07, "loss": 2.954, "step": 3175000 }, { "epoch": 19.94, "learning_rate": 1.711487988346247e-07, "loss": 2.953, "step": 3175500 }, { "epoch": 19.94, "learning_rate": 1.6330009669601035e-07, "loss": 2.9525, "step": 3176000 }, { "epoch": 19.95, "learning_rate": 1.5546709196167323e-07, "loss": 2.9541, "step": 3176500 }, { "epoch": 19.95, "learning_rate": 1.4761838982305884e-07, "loss": 2.9535, "step": 3177000 }, { "epoch": 19.95, "learning_rate": 1.397696876844445e-07, "loss": 2.9581, "step": 3177500 }, { "epoch": 19.95, "learning_rate": 1.3192098554583015e-07, "loss": 2.9551, "step": 3178000 }, { "epoch": 19.96, "learning_rate": 1.240722834072158e-07, "loss": 2.9521, "step": 3178500 }, { "epoch": 19.96, "learning_rate": 1.1622358126860142e-07, "loss": 2.9584, "step": 3179000 }, { "epoch": 19.96, "learning_rate": 1.0837487912998708e-07, "loss": 2.9526, "step": 3179500 }, { "epoch": 19.97, "learning_rate": 1.0052617699137271e-07, "loss": 2.9562, "step": 3180000 }, { "epoch": 19.97, "eval_accuracy": 0.4637797431547294, "eval_loss": 2.890829086303711, "eval_runtime": 1447.4458, "eval_samples_per_second": 57.896, "eval_steps_per_second": 5.79, "step": 3180000 }, { "epoch": 19.97, "learning_rate": 9.267747485275835e-08, "loss": 2.9584, "step": 3180500 }, { "epoch": 19.97, "learning_rate": 8.482877271414399e-08, "loss": 2.9562, "step": 3181000 }, { "epoch": 19.98, "learning_rate": 7.698007057552963e-08, "loss": 2.9607, "step": 3181500 }, { "epoch": 19.98, "learning_rate": 6.916276324546973e-08, "loss": 2.9621, "step": 3182000 }, { "epoch": 19.98, "learning_rate": 6.131406110685538e-08, "loss": 2.957, "step": 3182500 }, { "epoch": 19.99, "learning_rate": 5.346535896824102e-08, "loss": 2.9597, "step": 3183000 }, { "epoch": 19.99, "learning_rate": 4.561665682962665e-08, "loss": 2.9602, "step": 3183500 }, { "epoch": 19.99, "learning_rate": 3.7767954691012294e-08, "loss": 2.9545, "step": 3184000 }, { "epoch": 20.0, "learning_rate": 2.9919252552397935e-08, "loss": 2.9615, "step": 3184500 }, { "epoch": 20.0, "learning_rate": 2.2070550413783577e-08, "loss": 2.9569, "step": 3185000 } ], "max_steps": 3185240, "num_train_epochs": 20, "total_flos": 1.664548347838464e+19, "trial_name": null, "trial_params": null }