diff --git "a/checkpoint-680000/trainer_state.json" "b/checkpoint-680000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-680000/trainer_state.json" @@ -0,0 +1,8176 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 196.9872537659328, + "global_step": 680000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.14, + "learning_rate": 4.996378910776362e-05, + "loss": 8.4547, + "step": 500 + }, + { + "epoch": 0.29, + "learning_rate": 4.9927578215527235e-05, + "loss": 7.8499, + "step": 1000 + }, + { + "epoch": 0.43, + "learning_rate": 4.989136732329085e-05, + "loss": 7.554, + "step": 1500 + }, + { + "epoch": 0.58, + "learning_rate": 4.985515643105446e-05, + "loss": 7.4955, + "step": 2000 + }, + { + "epoch": 0.72, + "learning_rate": 4.9818945538818076e-05, + "loss": 7.2532, + "step": 2500 + }, + { + "epoch": 0.87, + "learning_rate": 4.97827346465817e-05, + "loss": 7.2584, + "step": 3000 + }, + { + "epoch": 1.01, + "learning_rate": 4.974652375434531e-05, + "loss": 7.175, + "step": 3500 + }, + { + "epoch": 1.16, + "learning_rate": 4.9710312862108924e-05, + "loss": 6.9372, + "step": 4000 + }, + { + "epoch": 1.3, + "learning_rate": 4.967410196987254e-05, + "loss": 6.9367, + "step": 4500 + }, + { + "epoch": 1.45, + "learning_rate": 4.9637891077636157e-05, + "loss": 6.7982, + "step": 5000 + }, + { + "epoch": 1.59, + "learning_rate": 4.9601680185399766e-05, + "loss": 6.9367, + "step": 5500 + }, + { + "epoch": 1.74, + "learning_rate": 4.956546929316339e-05, + "loss": 6.7646, + "step": 6000 + }, + { + "epoch": 1.88, + "learning_rate": 4.9529258400927005e-05, + "loss": 6.8283, + "step": 6500 + }, + { + "epoch": 2.03, + "learning_rate": 4.9493047508690614e-05, + "loss": 6.6735, + "step": 7000 + }, + { + "epoch": 2.17, + "learning_rate": 4.945683661645423e-05, + "loss": 6.5376, + "step": 7500 + }, + { + "epoch": 2.32, + "learning_rate": 4.9420625724217846e-05, + "loss": 6.4529, + "step": 8000 + }, + { + "epoch": 2.46, + "learning_rate": 4.938441483198146e-05, + "loss": 6.5368, + "step": 8500 + }, + { + "epoch": 2.61, + "learning_rate": 4.934820393974508e-05, + "loss": 6.4591, + "step": 9000 + }, + { + "epoch": 2.75, + "learning_rate": 4.9311993047508695e-05, + "loss": 6.313, + "step": 9500 + }, + { + "epoch": 2.9, + "learning_rate": 4.927578215527231e-05, + "loss": 6.4253, + "step": 10000 + }, + { + "epoch": 3.04, + "learning_rate": 4.923957126303592e-05, + "loss": 6.2994, + "step": 10500 + }, + { + "epoch": 3.19, + "learning_rate": 4.9203360370799536e-05, + "loss": 6.1315, + "step": 11000 + }, + { + "epoch": 3.33, + "learning_rate": 4.916714947856315e-05, + "loss": 6.2794, + "step": 11500 + }, + { + "epoch": 3.48, + "learning_rate": 4.913093858632677e-05, + "loss": 6.2569, + "step": 12000 + }, + { + "epoch": 3.62, + "learning_rate": 4.9094727694090384e-05, + "loss": 6.1676, + "step": 12500 + }, + { + "epoch": 3.77, + "learning_rate": 4.9058516801854e-05, + "loss": 6.0854, + "step": 13000 + }, + { + "epoch": 3.91, + "learning_rate": 4.902230590961762e-05, + "loss": 6.1253, + "step": 13500 + }, + { + "epoch": 4.06, + "learning_rate": 4.8986095017381226e-05, + "loss": 6.1414, + "step": 14000 + }, + { + "epoch": 4.2, + "learning_rate": 4.894988412514485e-05, + "loss": 6.0679, + "step": 14500 + }, + { + "epoch": 4.35, + "learning_rate": 4.8913673232908465e-05, + "loss": 6.0058, + "step": 15000 + }, + { + "epoch": 4.49, + "learning_rate": 4.8877462340672074e-05, + "loss": 6.0498, + "step": 15500 + }, + { + "epoch": 4.63, + "learning_rate": 4.884125144843569e-05, + "loss": 5.9609, + "step": 16000 + }, + { + "epoch": 4.78, + "learning_rate": 4.8805040556199306e-05, + "loss": 5.9032, + "step": 16500 + }, + { + "epoch": 4.92, + "learning_rate": 4.876882966396292e-05, + "loss": 5.7145, + "step": 17000 + }, + { + "epoch": 5.07, + "learning_rate": 4.873261877172654e-05, + "loss": 5.8655, + "step": 17500 + }, + { + "epoch": 5.21, + "learning_rate": 4.8696407879490155e-05, + "loss": 5.7881, + "step": 18000 + }, + { + "epoch": 5.36, + "learning_rate": 4.866019698725377e-05, + "loss": 5.7224, + "step": 18500 + }, + { + "epoch": 5.5, + "learning_rate": 4.862398609501738e-05, + "loss": 5.8037, + "step": 19000 + }, + { + "epoch": 5.65, + "learning_rate": 4.8587775202780996e-05, + "loss": 5.7771, + "step": 19500 + }, + { + "epoch": 5.79, + "learning_rate": 4.855156431054461e-05, + "loss": 5.6896, + "step": 20000 + }, + { + "epoch": 5.94, + "learning_rate": 4.851535341830823e-05, + "loss": 5.6381, + "step": 20500 + }, + { + "epoch": 6.08, + "learning_rate": 4.8479142526071845e-05, + "loss": 5.7065, + "step": 21000 + }, + { + "epoch": 6.23, + "learning_rate": 4.844293163383546e-05, + "loss": 5.6466, + "step": 21500 + }, + { + "epoch": 6.37, + "learning_rate": 4.840672074159908e-05, + "loss": 5.5202, + "step": 22000 + }, + { + "epoch": 6.52, + "learning_rate": 4.8370509849362686e-05, + "loss": 5.553, + "step": 22500 + }, + { + "epoch": 6.66, + "learning_rate": 4.833429895712631e-05, + "loss": 5.523, + "step": 23000 + }, + { + "epoch": 6.81, + "learning_rate": 4.8298088064889925e-05, + "loss": 5.4201, + "step": 23500 + }, + { + "epoch": 6.95, + "learning_rate": 4.8261877172653534e-05, + "loss": 5.5552, + "step": 24000 + }, + { + "epoch": 7.1, + "learning_rate": 4.822566628041715e-05, + "loss": 5.361, + "step": 24500 + }, + { + "epoch": 7.24, + "learning_rate": 4.8189455388180767e-05, + "loss": 5.4092, + "step": 25000 + }, + { + "epoch": 7.39, + "learning_rate": 4.815324449594438e-05, + "loss": 5.3235, + "step": 25500 + }, + { + "epoch": 7.53, + "learning_rate": 4.811703360370799e-05, + "loss": 5.4796, + "step": 26000 + }, + { + "epoch": 7.68, + "learning_rate": 4.8080822711471615e-05, + "loss": 5.3789, + "step": 26500 + }, + { + "epoch": 7.82, + "learning_rate": 4.804461181923523e-05, + "loss": 5.2428, + "step": 27000 + }, + { + "epoch": 7.97, + "learning_rate": 4.800840092699884e-05, + "loss": 5.4215, + "step": 27500 + }, + { + "epoch": 8.11, + "learning_rate": 4.797219003476246e-05, + "loss": 5.2112, + "step": 28000 + }, + { + "epoch": 8.26, + "learning_rate": 4.793597914252607e-05, + "loss": 5.4211, + "step": 28500 + }, + { + "epoch": 8.4, + "learning_rate": 4.789976825028969e-05, + "loss": 5.2253, + "step": 29000 + }, + { + "epoch": 8.55, + "learning_rate": 4.7863557358053305e-05, + "loss": 5.1749, + "step": 29500 + }, + { + "epoch": 8.69, + "learning_rate": 4.782734646581692e-05, + "loss": 5.121, + "step": 30000 + }, + { + "epoch": 8.84, + "learning_rate": 4.779113557358054e-05, + "loss": 5.1387, + "step": 30500 + }, + { + "epoch": 8.98, + "learning_rate": 4.7754924681344146e-05, + "loss": 5.198, + "step": 31000 + }, + { + "epoch": 9.13, + "learning_rate": 4.771871378910777e-05, + "loss": 4.9978, + "step": 31500 + }, + { + "epoch": 9.27, + "learning_rate": 4.7682502896871385e-05, + "loss": 4.9791, + "step": 32000 + }, + { + "epoch": 9.41, + "learning_rate": 4.7646292004634994e-05, + "loss": 4.9845, + "step": 32500 + }, + { + "epoch": 9.56, + "learning_rate": 4.761008111239861e-05, + "loss": 4.8999, + "step": 33000 + }, + { + "epoch": 9.7, + "learning_rate": 4.7573870220162227e-05, + "loss": 4.9872, + "step": 33500 + }, + { + "epoch": 9.85, + "learning_rate": 4.753765932792584e-05, + "loss": 5.0741, + "step": 34000 + }, + { + "epoch": 9.99, + "learning_rate": 4.750144843568945e-05, + "loss": 5.0205, + "step": 34500 + }, + { + "epoch": 10.14, + "learning_rate": 4.7465237543453075e-05, + "loss": 4.7512, + "step": 35000 + }, + { + "epoch": 10.28, + "learning_rate": 4.742902665121669e-05, + "loss": 4.9417, + "step": 35500 + }, + { + "epoch": 10.43, + "learning_rate": 4.73928157589803e-05, + "loss": 4.9746, + "step": 36000 + }, + { + "epoch": 10.57, + "learning_rate": 4.735660486674392e-05, + "loss": 4.747, + "step": 36500 + }, + { + "epoch": 10.72, + "learning_rate": 4.732039397450753e-05, + "loss": 4.6858, + "step": 37000 + }, + { + "epoch": 10.86, + "learning_rate": 4.728418308227115e-05, + "loss": 4.8208, + "step": 37500 + }, + { + "epoch": 11.01, + "learning_rate": 4.7247972190034765e-05, + "loss": 4.7271, + "step": 38000 + }, + { + "epoch": 11.15, + "learning_rate": 4.721176129779838e-05, + "loss": 4.6115, + "step": 38500 + }, + { + "epoch": 11.3, + "learning_rate": 4.7175550405562e-05, + "loss": 4.7304, + "step": 39000 + }, + { + "epoch": 11.44, + "learning_rate": 4.7139339513325606e-05, + "loss": 4.4773, + "step": 39500 + }, + { + "epoch": 11.59, + "learning_rate": 4.710312862108923e-05, + "loss": 4.6107, + "step": 40000 + }, + { + "epoch": 11.73, + "learning_rate": 4.706691772885284e-05, + "loss": 4.5442, + "step": 40500 + }, + { + "epoch": 11.88, + "learning_rate": 4.7030706836616454e-05, + "loss": 4.744, + "step": 41000 + }, + { + "epoch": 12.02, + "learning_rate": 4.699449594438007e-05, + "loss": 4.6054, + "step": 41500 + }, + { + "epoch": 12.17, + "learning_rate": 4.695828505214369e-05, + "loss": 4.6134, + "step": 42000 + }, + { + "epoch": 12.31, + "learning_rate": 4.69220741599073e-05, + "loss": 4.578, + "step": 42500 + }, + { + "epoch": 12.46, + "learning_rate": 4.688586326767091e-05, + "loss": 4.4292, + "step": 43000 + }, + { + "epoch": 12.6, + "learning_rate": 4.6849652375434535e-05, + "loss": 4.364, + "step": 43500 + }, + { + "epoch": 12.75, + "learning_rate": 4.681344148319815e-05, + "loss": 4.3788, + "step": 44000 + }, + { + "epoch": 12.89, + "learning_rate": 4.677723059096176e-05, + "loss": 4.4367, + "step": 44500 + }, + { + "epoch": 13.04, + "learning_rate": 4.674101969872538e-05, + "loss": 4.3541, + "step": 45000 + }, + { + "epoch": 13.18, + "learning_rate": 4.670480880648899e-05, + "loss": 4.2888, + "step": 45500 + }, + { + "epoch": 13.33, + "learning_rate": 4.666859791425261e-05, + "loss": 4.387, + "step": 46000 + }, + { + "epoch": 13.47, + "learning_rate": 4.6632387022016225e-05, + "loss": 4.3504, + "step": 46500 + }, + { + "epoch": 13.62, + "learning_rate": 4.659617612977984e-05, + "loss": 4.2987, + "step": 47000 + }, + { + "epoch": 13.76, + "learning_rate": 4.655996523754346e-05, + "loss": 4.3652, + "step": 47500 + }, + { + "epoch": 13.9, + "learning_rate": 4.6523754345307066e-05, + "loss": 4.4629, + "step": 48000 + }, + { + "epoch": 14.05, + "learning_rate": 4.648754345307069e-05, + "loss": 4.2083, + "step": 48500 + }, + { + "epoch": 14.19, + "learning_rate": 4.64513325608343e-05, + "loss": 4.2904, + "step": 49000 + }, + { + "epoch": 14.34, + "learning_rate": 4.6415121668597915e-05, + "loss": 4.2137, + "step": 49500 + }, + { + "epoch": 14.48, + "learning_rate": 4.637891077636153e-05, + "loss": 4.2921, + "step": 50000 + }, + { + "epoch": 14.63, + "learning_rate": 4.634269988412515e-05, + "loss": 4.2011, + "step": 50500 + }, + { + "epoch": 14.77, + "learning_rate": 4.630648899188876e-05, + "loss": 4.1971, + "step": 51000 + }, + { + "epoch": 14.92, + "learning_rate": 4.627027809965237e-05, + "loss": 4.2372, + "step": 51500 + }, + { + "epoch": 15.06, + "learning_rate": 4.6234067207415995e-05, + "loss": 4.1604, + "step": 52000 + }, + { + "epoch": 15.21, + "learning_rate": 4.619785631517961e-05, + "loss": 4.0747, + "step": 52500 + }, + { + "epoch": 15.35, + "learning_rate": 4.616164542294322e-05, + "loss": 4.1311, + "step": 53000 + }, + { + "epoch": 15.5, + "learning_rate": 4.612543453070684e-05, + "loss": 4.1403, + "step": 53500 + }, + { + "epoch": 15.64, + "learning_rate": 4.608922363847045e-05, + "loss": 4.2004, + "step": 54000 + }, + { + "epoch": 15.79, + "learning_rate": 4.605301274623407e-05, + "loss": 4.1078, + "step": 54500 + }, + { + "epoch": 15.93, + "learning_rate": 4.6016801853997685e-05, + "loss": 4.1072, + "step": 55000 + }, + { + "epoch": 16.08, + "learning_rate": 4.59805909617613e-05, + "loss": 4.1097, + "step": 55500 + }, + { + "epoch": 16.22, + "learning_rate": 4.594438006952492e-05, + "loss": 3.901, + "step": 56000 + }, + { + "epoch": 16.37, + "learning_rate": 4.5908169177288526e-05, + "loss": 4.0599, + "step": 56500 + }, + { + "epoch": 16.51, + "learning_rate": 4.587195828505215e-05, + "loss": 4.0458, + "step": 57000 + }, + { + "epoch": 16.66, + "learning_rate": 4.583574739281576e-05, + "loss": 4.0344, + "step": 57500 + }, + { + "epoch": 16.8, + "learning_rate": 4.5799536500579375e-05, + "loss": 3.9669, + "step": 58000 + }, + { + "epoch": 16.95, + "learning_rate": 4.576332560834299e-05, + "loss": 4.0911, + "step": 58500 + }, + { + "epoch": 17.09, + "learning_rate": 4.572711471610661e-05, + "loss": 4.0642, + "step": 59000 + }, + { + "epoch": 17.24, + "learning_rate": 4.569090382387022e-05, + "loss": 3.9691, + "step": 59500 + }, + { + "epoch": 17.38, + "learning_rate": 4.565469293163384e-05, + "loss": 3.8211, + "step": 60000 + }, + { + "epoch": 17.53, + "learning_rate": 4.5618482039397455e-05, + "loss": 3.8802, + "step": 60500 + }, + { + "epoch": 17.67, + "learning_rate": 4.558227114716107e-05, + "loss": 3.9179, + "step": 61000 + }, + { + "epoch": 17.82, + "learning_rate": 4.554606025492468e-05, + "loss": 3.9943, + "step": 61500 + }, + { + "epoch": 17.96, + "learning_rate": 4.55098493626883e-05, + "loss": 3.9248, + "step": 62000 + }, + { + "epoch": 18.11, + "learning_rate": 4.547363847045191e-05, + "loss": 3.8573, + "step": 62500 + }, + { + "epoch": 18.25, + "learning_rate": 4.543742757821553e-05, + "loss": 3.8063, + "step": 63000 + }, + { + "epoch": 18.4, + "learning_rate": 4.5401216685979145e-05, + "loss": 3.7963, + "step": 63500 + }, + { + "epoch": 18.54, + "learning_rate": 4.536500579374276e-05, + "loss": 3.923, + "step": 64000 + }, + { + "epoch": 18.68, + "learning_rate": 4.532879490150638e-05, + "loss": 3.817, + "step": 64500 + }, + { + "epoch": 18.83, + "learning_rate": 4.5292584009269986e-05, + "loss": 3.8602, + "step": 65000 + }, + { + "epoch": 18.97, + "learning_rate": 4.525637311703361e-05, + "loss": 3.6989, + "step": 65500 + }, + { + "epoch": 19.12, + "learning_rate": 4.522016222479722e-05, + "loss": 3.7462, + "step": 66000 + }, + { + "epoch": 19.26, + "learning_rate": 4.5183951332560835e-05, + "loss": 3.7723, + "step": 66500 + }, + { + "epoch": 19.41, + "learning_rate": 4.514774044032445e-05, + "loss": 3.8287, + "step": 67000 + }, + { + "epoch": 19.55, + "learning_rate": 4.511152954808807e-05, + "loss": 3.7813, + "step": 67500 + }, + { + "epoch": 19.7, + "learning_rate": 4.507531865585168e-05, + "loss": 3.893, + "step": 68000 + }, + { + "epoch": 19.84, + "learning_rate": 4.50391077636153e-05, + "loss": 3.8048, + "step": 68500 + }, + { + "epoch": 19.99, + "learning_rate": 4.5002896871378915e-05, + "loss": 3.7366, + "step": 69000 + }, + { + "epoch": 20.13, + "learning_rate": 4.4966685979142524e-05, + "loss": 3.6066, + "step": 69500 + }, + { + "epoch": 20.28, + "learning_rate": 4.493047508690614e-05, + "loss": 3.6848, + "step": 70000 + }, + { + "epoch": 20.42, + "learning_rate": 4.4894264194669763e-05, + "loss": 3.7469, + "step": 70500 + }, + { + "epoch": 20.57, + "learning_rate": 4.485805330243337e-05, + "loss": 3.7087, + "step": 71000 + }, + { + "epoch": 20.71, + "learning_rate": 4.482184241019699e-05, + "loss": 3.7463, + "step": 71500 + }, + { + "epoch": 20.86, + "learning_rate": 4.4785631517960605e-05, + "loss": 3.6861, + "step": 72000 + }, + { + "epoch": 21.0, + "learning_rate": 4.474942062572422e-05, + "loss": 3.7919, + "step": 72500 + }, + { + "epoch": 21.15, + "learning_rate": 4.471320973348784e-05, + "loss": 3.6995, + "step": 73000 + }, + { + "epoch": 21.29, + "learning_rate": 4.4676998841251446e-05, + "loss": 3.5774, + "step": 73500 + }, + { + "epoch": 21.44, + "learning_rate": 4.464078794901507e-05, + "loss": 3.6384, + "step": 74000 + }, + { + "epoch": 21.58, + "learning_rate": 4.460457705677868e-05, + "loss": 3.7398, + "step": 74500 + }, + { + "epoch": 21.73, + "learning_rate": 4.4568366164542295e-05, + "loss": 3.6163, + "step": 75000 + }, + { + "epoch": 21.87, + "learning_rate": 4.453215527230592e-05, + "loss": 3.6542, + "step": 75500 + }, + { + "epoch": 22.02, + "learning_rate": 4.449594438006953e-05, + "loss": 3.5554, + "step": 76000 + }, + { + "epoch": 22.16, + "learning_rate": 4.445973348783314e-05, + "loss": 3.4238, + "step": 76500 + }, + { + "epoch": 22.31, + "learning_rate": 4.442352259559676e-05, + "loss": 3.5048, + "step": 77000 + }, + { + "epoch": 22.45, + "learning_rate": 4.4387311703360375e-05, + "loss": 3.3922, + "step": 77500 + }, + { + "epoch": 22.6, + "learning_rate": 4.4351100811123985e-05, + "loss": 3.5757, + "step": 78000 + }, + { + "epoch": 22.74, + "learning_rate": 4.43148899188876e-05, + "loss": 3.6146, + "step": 78500 + }, + { + "epoch": 22.89, + "learning_rate": 4.4278679026651224e-05, + "loss": 3.4844, + "step": 79000 + }, + { + "epoch": 23.03, + "learning_rate": 4.424246813441483e-05, + "loss": 3.5872, + "step": 79500 + }, + { + "epoch": 23.17, + "learning_rate": 4.420625724217845e-05, + "loss": 3.336, + "step": 80000 + }, + { + "epoch": 23.32, + "learning_rate": 4.4170046349942065e-05, + "loss": 3.4509, + "step": 80500 + }, + { + "epoch": 23.46, + "learning_rate": 4.413383545770568e-05, + "loss": 3.5564, + "step": 81000 + }, + { + "epoch": 23.61, + "learning_rate": 4.40976245654693e-05, + "loss": 3.4449, + "step": 81500 + }, + { + "epoch": 23.75, + "learning_rate": 4.4061413673232907e-05, + "loss": 3.4913, + "step": 82000 + }, + { + "epoch": 23.9, + "learning_rate": 4.402520278099653e-05, + "loss": 3.6125, + "step": 82500 + }, + { + "epoch": 24.04, + "learning_rate": 4.398899188876014e-05, + "loss": 3.3634, + "step": 83000 + }, + { + "epoch": 24.19, + "learning_rate": 4.3952780996523755e-05, + "loss": 3.5246, + "step": 83500 + }, + { + "epoch": 24.33, + "learning_rate": 4.391657010428738e-05, + "loss": 3.3986, + "step": 84000 + }, + { + "epoch": 24.48, + "learning_rate": 4.388035921205099e-05, + "loss": 3.4707, + "step": 84500 + }, + { + "epoch": 24.62, + "learning_rate": 4.38441483198146e-05, + "loss": 3.4469, + "step": 85000 + }, + { + "epoch": 24.77, + "learning_rate": 4.380793742757822e-05, + "loss": 3.3893, + "step": 85500 + }, + { + "epoch": 24.91, + "learning_rate": 4.3771726535341835e-05, + "loss": 3.3876, + "step": 86000 + }, + { + "epoch": 25.06, + "learning_rate": 4.3735515643105445e-05, + "loss": 3.3913, + "step": 86500 + }, + { + "epoch": 25.2, + "learning_rate": 4.369930475086906e-05, + "loss": 3.472, + "step": 87000 + }, + { + "epoch": 25.35, + "learning_rate": 4.3663093858632684e-05, + "loss": 3.2559, + "step": 87500 + }, + { + "epoch": 25.49, + "learning_rate": 4.362688296639629e-05, + "loss": 3.461, + "step": 88000 + }, + { + "epoch": 25.64, + "learning_rate": 4.359067207415991e-05, + "loss": 3.3586, + "step": 88500 + }, + { + "epoch": 25.78, + "learning_rate": 4.3554461181923525e-05, + "loss": 3.2862, + "step": 89000 + }, + { + "epoch": 25.93, + "learning_rate": 4.351825028968714e-05, + "loss": 3.2737, + "step": 89500 + }, + { + "epoch": 26.07, + "learning_rate": 4.348203939745076e-05, + "loss": 3.2978, + "step": 90000 + }, + { + "epoch": 26.22, + "learning_rate": 4.344582850521437e-05, + "loss": 3.2151, + "step": 90500 + }, + { + "epoch": 26.36, + "learning_rate": 4.340961761297799e-05, + "loss": 3.339, + "step": 91000 + }, + { + "epoch": 26.51, + "learning_rate": 4.33734067207416e-05, + "loss": 3.2553, + "step": 91500 + }, + { + "epoch": 26.65, + "learning_rate": 4.3337195828505215e-05, + "loss": 3.1708, + "step": 92000 + }, + { + "epoch": 26.8, + "learning_rate": 4.330098493626883e-05, + "loss": 3.2419, + "step": 92500 + }, + { + "epoch": 26.94, + "learning_rate": 4.326477404403245e-05, + "loss": 3.3119, + "step": 93000 + }, + { + "epoch": 27.09, + "learning_rate": 4.322856315179606e-05, + "loss": 3.1761, + "step": 93500 + }, + { + "epoch": 27.23, + "learning_rate": 4.319235225955968e-05, + "loss": 3.0938, + "step": 94000 + }, + { + "epoch": 27.38, + "learning_rate": 4.3156141367323295e-05, + "loss": 3.2, + "step": 94500 + }, + { + "epoch": 27.52, + "learning_rate": 4.3119930475086905e-05, + "loss": 3.2952, + "step": 95000 + }, + { + "epoch": 27.67, + "learning_rate": 4.308371958285052e-05, + "loss": 3.2587, + "step": 95500 + }, + { + "epoch": 27.81, + "learning_rate": 4.3047508690614144e-05, + "loss": 3.1839, + "step": 96000 + }, + { + "epoch": 27.95, + "learning_rate": 4.301129779837775e-05, + "loss": 3.2991, + "step": 96500 + }, + { + "epoch": 28.1, + "learning_rate": 4.297508690614137e-05, + "loss": 3.2873, + "step": 97000 + }, + { + "epoch": 28.24, + "learning_rate": 4.2938876013904985e-05, + "loss": 3.2506, + "step": 97500 + }, + { + "epoch": 28.39, + "learning_rate": 4.29026651216686e-05, + "loss": 3.0849, + "step": 98000 + }, + { + "epoch": 28.53, + "learning_rate": 4.286645422943222e-05, + "loss": 3.1321, + "step": 98500 + }, + { + "epoch": 28.68, + "learning_rate": 4.283024333719583e-05, + "loss": 3.0935, + "step": 99000 + }, + { + "epoch": 28.82, + "learning_rate": 4.279403244495945e-05, + "loss": 3.1311, + "step": 99500 + }, + { + "epoch": 28.97, + "learning_rate": 4.275782155272306e-05, + "loss": 3.0079, + "step": 100000 + }, + { + "epoch": 29.11, + "learning_rate": 4.2721610660486675e-05, + "loss": 3.0359, + "step": 100500 + }, + { + "epoch": 29.26, + "learning_rate": 4.268539976825029e-05, + "loss": 3.1054, + "step": 101000 + }, + { + "epoch": 29.4, + "learning_rate": 4.264918887601391e-05, + "loss": 2.9851, + "step": 101500 + }, + { + "epoch": 29.55, + "learning_rate": 4.261297798377752e-05, + "loss": 3.0301, + "step": 102000 + }, + { + "epoch": 29.69, + "learning_rate": 4.257676709154114e-05, + "loss": 3.0815, + "step": 102500 + }, + { + "epoch": 29.84, + "learning_rate": 4.2540556199304755e-05, + "loss": 3.0711, + "step": 103000 + }, + { + "epoch": 29.98, + "learning_rate": 4.2504345307068365e-05, + "loss": 3.2656, + "step": 103500 + }, + { + "epoch": 30.13, + "learning_rate": 4.246813441483198e-05, + "loss": 2.9655, + "step": 104000 + }, + { + "epoch": 30.27, + "learning_rate": 4.2431923522595604e-05, + "loss": 2.9877, + "step": 104500 + }, + { + "epoch": 30.42, + "learning_rate": 4.239571263035921e-05, + "loss": 3.1679, + "step": 105000 + }, + { + "epoch": 30.56, + "learning_rate": 4.235950173812283e-05, + "loss": 2.9898, + "step": 105500 + }, + { + "epoch": 30.71, + "learning_rate": 4.2323290845886445e-05, + "loss": 3.0315, + "step": 106000 + }, + { + "epoch": 30.85, + "learning_rate": 4.228707995365006e-05, + "loss": 2.9994, + "step": 106500 + }, + { + "epoch": 31.0, + "learning_rate": 4.225086906141367e-05, + "loss": 3.0117, + "step": 107000 + }, + { + "epoch": 31.14, + "learning_rate": 4.2214658169177294e-05, + "loss": 2.9706, + "step": 107500 + }, + { + "epoch": 31.29, + "learning_rate": 4.217844727694091e-05, + "loss": 2.8996, + "step": 108000 + }, + { + "epoch": 31.43, + "learning_rate": 4.214223638470452e-05, + "loss": 2.8631, + "step": 108500 + }, + { + "epoch": 31.58, + "learning_rate": 4.2106025492468135e-05, + "loss": 3.0409, + "step": 109000 + }, + { + "epoch": 31.72, + "learning_rate": 4.206981460023175e-05, + "loss": 2.9536, + "step": 109500 + }, + { + "epoch": 31.87, + "learning_rate": 4.203360370799537e-05, + "loss": 2.9889, + "step": 110000 + }, + { + "epoch": 32.01, + "learning_rate": 4.199739281575898e-05, + "loss": 2.9418, + "step": 110500 + }, + { + "epoch": 32.16, + "learning_rate": 4.19611819235226e-05, + "loss": 2.9762, + "step": 111000 + }, + { + "epoch": 32.3, + "learning_rate": 4.1924971031286216e-05, + "loss": 2.8923, + "step": 111500 + }, + { + "epoch": 32.44, + "learning_rate": 4.1888760139049825e-05, + "loss": 2.8679, + "step": 112000 + }, + { + "epoch": 32.59, + "learning_rate": 4.185254924681344e-05, + "loss": 3.0042, + "step": 112500 + }, + { + "epoch": 32.73, + "learning_rate": 4.1816338354577064e-05, + "loss": 2.7931, + "step": 113000 + }, + { + "epoch": 32.88, + "learning_rate": 4.178012746234067e-05, + "loss": 3.0075, + "step": 113500 + }, + { + "epoch": 33.02, + "learning_rate": 4.174391657010429e-05, + "loss": 2.9121, + "step": 114000 + }, + { + "epoch": 33.17, + "learning_rate": 4.1707705677867905e-05, + "loss": 2.8189, + "step": 114500 + }, + { + "epoch": 33.31, + "learning_rate": 4.167149478563152e-05, + "loss": 2.7948, + "step": 115000 + }, + { + "epoch": 33.46, + "learning_rate": 4.163528389339513e-05, + "loss": 2.8265, + "step": 115500 + }, + { + "epoch": 33.6, + "learning_rate": 4.1599073001158754e-05, + "loss": 2.934, + "step": 116000 + }, + { + "epoch": 33.75, + "learning_rate": 4.156286210892237e-05, + "loss": 2.8438, + "step": 116500 + }, + { + "epoch": 33.89, + "learning_rate": 4.152665121668598e-05, + "loss": 2.9726, + "step": 117000 + }, + { + "epoch": 34.04, + "learning_rate": 4.1490440324449595e-05, + "loss": 2.7706, + "step": 117500 + }, + { + "epoch": 34.18, + "learning_rate": 4.145422943221321e-05, + "loss": 2.7422, + "step": 118000 + }, + { + "epoch": 34.33, + "learning_rate": 4.141801853997683e-05, + "loss": 2.7464, + "step": 118500 + }, + { + "epoch": 34.47, + "learning_rate": 4.1381807647740443e-05, + "loss": 2.842, + "step": 119000 + }, + { + "epoch": 34.62, + "learning_rate": 4.134559675550406e-05, + "loss": 2.7679, + "step": 119500 + }, + { + "epoch": 34.76, + "learning_rate": 4.1309385863267676e-05, + "loss": 2.8504, + "step": 120000 + }, + { + "epoch": 34.91, + "learning_rate": 4.1273174971031285e-05, + "loss": 2.7849, + "step": 120500 + }, + { + "epoch": 35.05, + "learning_rate": 4.12369640787949e-05, + "loss": 2.7947, + "step": 121000 + }, + { + "epoch": 35.2, + "learning_rate": 4.120075318655852e-05, + "loss": 2.6207, + "step": 121500 + }, + { + "epoch": 35.34, + "learning_rate": 4.116454229432213e-05, + "loss": 2.7135, + "step": 122000 + }, + { + "epoch": 35.49, + "learning_rate": 4.112833140208575e-05, + "loss": 2.796, + "step": 122500 + }, + { + "epoch": 35.63, + "learning_rate": 4.1092120509849365e-05, + "loss": 2.7093, + "step": 123000 + }, + { + "epoch": 35.78, + "learning_rate": 4.105590961761298e-05, + "loss": 2.722, + "step": 123500 + }, + { + "epoch": 35.92, + "learning_rate": 4.101969872537659e-05, + "loss": 2.6685, + "step": 124000 + }, + { + "epoch": 36.07, + "learning_rate": 4.0983487833140214e-05, + "loss": 2.8914, + "step": 124500 + }, + { + "epoch": 36.21, + "learning_rate": 4.094727694090383e-05, + "loss": 2.7502, + "step": 125000 + }, + { + "epoch": 36.36, + "learning_rate": 4.091106604866744e-05, + "loss": 2.6001, + "step": 125500 + }, + { + "epoch": 36.5, + "learning_rate": 4.0874855156431055e-05, + "loss": 2.6482, + "step": 126000 + }, + { + "epoch": 36.65, + "learning_rate": 4.083864426419467e-05, + "loss": 2.5456, + "step": 126500 + }, + { + "epoch": 36.79, + "learning_rate": 4.080243337195829e-05, + "loss": 2.7484, + "step": 127000 + }, + { + "epoch": 36.94, + "learning_rate": 4.0766222479721903e-05, + "loss": 2.706, + "step": 127500 + }, + { + "epoch": 37.08, + "learning_rate": 4.073001158748552e-05, + "loss": 2.6221, + "step": 128000 + }, + { + "epoch": 37.22, + "learning_rate": 4.0693800695249136e-05, + "loss": 2.5343, + "step": 128500 + }, + { + "epoch": 37.37, + "learning_rate": 4.0657589803012745e-05, + "loss": 2.5271, + "step": 129000 + }, + { + "epoch": 37.51, + "learning_rate": 4.062137891077636e-05, + "loss": 2.6493, + "step": 129500 + }, + { + "epoch": 37.66, + "learning_rate": 4.058516801853998e-05, + "loss": 2.6699, + "step": 130000 + }, + { + "epoch": 37.8, + "learning_rate": 4.054895712630359e-05, + "loss": 2.6375, + "step": 130500 + }, + { + "epoch": 37.95, + "learning_rate": 4.051274623406721e-05, + "loss": 2.6659, + "step": 131000 + }, + { + "epoch": 38.09, + "learning_rate": 4.0476535341830825e-05, + "loss": 2.6404, + "step": 131500 + }, + { + "epoch": 38.24, + "learning_rate": 4.044032444959444e-05, + "loss": 2.4527, + "step": 132000 + }, + { + "epoch": 38.38, + "learning_rate": 4.040411355735805e-05, + "loss": 2.4812, + "step": 132500 + }, + { + "epoch": 38.53, + "learning_rate": 4.0367902665121674e-05, + "loss": 2.675, + "step": 133000 + }, + { + "epoch": 38.67, + "learning_rate": 4.033169177288529e-05, + "loss": 2.6116, + "step": 133500 + }, + { + "epoch": 38.82, + "learning_rate": 4.02954808806489e-05, + "loss": 2.5089, + "step": 134000 + }, + { + "epoch": 38.96, + "learning_rate": 4.0259269988412515e-05, + "loss": 2.6363, + "step": 134500 + }, + { + "epoch": 39.11, + "learning_rate": 4.022305909617613e-05, + "loss": 2.4868, + "step": 135000 + }, + { + "epoch": 39.25, + "learning_rate": 4.018684820393975e-05, + "loss": 2.4665, + "step": 135500 + }, + { + "epoch": 39.4, + "learning_rate": 4.015063731170336e-05, + "loss": 2.5217, + "step": 136000 + }, + { + "epoch": 39.54, + "learning_rate": 4.011442641946698e-05, + "loss": 2.5457, + "step": 136500 + }, + { + "epoch": 39.69, + "learning_rate": 4.0078215527230596e-05, + "loss": 2.5308, + "step": 137000 + }, + { + "epoch": 39.83, + "learning_rate": 4.0042004634994205e-05, + "loss": 2.5148, + "step": 137500 + }, + { + "epoch": 39.98, + "learning_rate": 4.000579374275782e-05, + "loss": 2.552, + "step": 138000 + }, + { + "epoch": 40.12, + "learning_rate": 3.996958285052144e-05, + "loss": 2.5229, + "step": 138500 + }, + { + "epoch": 40.27, + "learning_rate": 3.993337195828505e-05, + "loss": 2.3691, + "step": 139000 + }, + { + "epoch": 40.41, + "learning_rate": 3.989716106604867e-05, + "loss": 2.4165, + "step": 139500 + }, + { + "epoch": 40.56, + "learning_rate": 3.9860950173812286e-05, + "loss": 2.5134, + "step": 140000 + }, + { + "epoch": 40.7, + "learning_rate": 3.98247392815759e-05, + "loss": 2.4526, + "step": 140500 + }, + { + "epoch": 40.85, + "learning_rate": 3.978852838933951e-05, + "loss": 2.4467, + "step": 141000 + }, + { + "epoch": 40.99, + "learning_rate": 3.9752317497103134e-05, + "loss": 2.4586, + "step": 141500 + }, + { + "epoch": 41.14, + "learning_rate": 3.971610660486675e-05, + "loss": 2.2715, + "step": 142000 + }, + { + "epoch": 41.28, + "learning_rate": 3.967989571263036e-05, + "loss": 2.4122, + "step": 142500 + }, + { + "epoch": 41.43, + "learning_rate": 3.9643684820393975e-05, + "loss": 2.5038, + "step": 143000 + }, + { + "epoch": 41.57, + "learning_rate": 3.960747392815759e-05, + "loss": 2.4213, + "step": 143500 + }, + { + "epoch": 41.71, + "learning_rate": 3.957126303592121e-05, + "loss": 2.3776, + "step": 144000 + }, + { + "epoch": 41.86, + "learning_rate": 3.953505214368482e-05, + "loss": 2.4879, + "step": 144500 + }, + { + "epoch": 42.0, + "learning_rate": 3.949884125144844e-05, + "loss": 2.3793, + "step": 145000 + }, + { + "epoch": 42.15, + "learning_rate": 3.9462630359212056e-05, + "loss": 2.3577, + "step": 145500 + }, + { + "epoch": 42.29, + "learning_rate": 3.9426419466975665e-05, + "loss": 2.2899, + "step": 146000 + }, + { + "epoch": 42.44, + "learning_rate": 3.939020857473928e-05, + "loss": 2.2713, + "step": 146500 + }, + { + "epoch": 42.58, + "learning_rate": 3.93539976825029e-05, + "loss": 2.4175, + "step": 147000 + }, + { + "epoch": 42.73, + "learning_rate": 3.9317786790266513e-05, + "loss": 2.3225, + "step": 147500 + }, + { + "epoch": 42.87, + "learning_rate": 3.928157589803013e-05, + "loss": 2.4637, + "step": 148000 + }, + { + "epoch": 43.02, + "learning_rate": 3.9245365005793746e-05, + "loss": 2.4782, + "step": 148500 + }, + { + "epoch": 43.16, + "learning_rate": 3.920915411355736e-05, + "loss": 2.2441, + "step": 149000 + }, + { + "epoch": 43.31, + "learning_rate": 3.917294322132097e-05, + "loss": 2.3654, + "step": 149500 + }, + { + "epoch": 43.45, + "learning_rate": 3.9136732329084594e-05, + "loss": 2.3439, + "step": 150000 + }, + { + "epoch": 43.6, + "learning_rate": 3.91005214368482e-05, + "loss": 2.1964, + "step": 150500 + }, + { + "epoch": 43.74, + "learning_rate": 3.906431054461182e-05, + "loss": 2.3778, + "step": 151000 + }, + { + "epoch": 43.89, + "learning_rate": 3.9028099652375435e-05, + "loss": 2.3038, + "step": 151500 + }, + { + "epoch": 44.03, + "learning_rate": 3.899188876013905e-05, + "loss": 2.3877, + "step": 152000 + }, + { + "epoch": 44.18, + "learning_rate": 3.895567786790267e-05, + "loss": 2.2923, + "step": 152500 + }, + { + "epoch": 44.32, + "learning_rate": 3.891946697566628e-05, + "loss": 2.1665, + "step": 153000 + }, + { + "epoch": 44.47, + "learning_rate": 3.88832560834299e-05, + "loss": 2.2541, + "step": 153500 + }, + { + "epoch": 44.61, + "learning_rate": 3.8847045191193516e-05, + "loss": 2.3349, + "step": 154000 + }, + { + "epoch": 44.76, + "learning_rate": 3.8810834298957125e-05, + "loss": 2.1701, + "step": 154500 + }, + { + "epoch": 44.9, + "learning_rate": 3.877462340672075e-05, + "loss": 2.3005, + "step": 155000 + }, + { + "epoch": 45.05, + "learning_rate": 3.873841251448436e-05, + "loss": 2.2592, + "step": 155500 + }, + { + "epoch": 45.19, + "learning_rate": 3.8702201622247974e-05, + "loss": 2.1503, + "step": 156000 + }, + { + "epoch": 45.34, + "learning_rate": 3.866599073001159e-05, + "loss": 2.2068, + "step": 156500 + }, + { + "epoch": 45.48, + "learning_rate": 3.8629779837775206e-05, + "loss": 2.2581, + "step": 157000 + }, + { + "epoch": 45.63, + "learning_rate": 3.859356894553882e-05, + "loss": 2.207, + "step": 157500 + }, + { + "epoch": 45.77, + "learning_rate": 3.855735805330243e-05, + "loss": 2.2473, + "step": 158000 + }, + { + "epoch": 45.92, + "learning_rate": 3.8521147161066054e-05, + "loss": 2.3088, + "step": 158500 + }, + { + "epoch": 46.06, + "learning_rate": 3.848493626882966e-05, + "loss": 2.199, + "step": 159000 + }, + { + "epoch": 46.21, + "learning_rate": 3.844872537659328e-05, + "loss": 2.1469, + "step": 159500 + }, + { + "epoch": 46.35, + "learning_rate": 3.8412514484356895e-05, + "loss": 2.2277, + "step": 160000 + }, + { + "epoch": 46.49, + "learning_rate": 3.837630359212051e-05, + "loss": 2.0807, + "step": 160500 + }, + { + "epoch": 46.64, + "learning_rate": 3.834009269988413e-05, + "loss": 2.1421, + "step": 161000 + }, + { + "epoch": 46.78, + "learning_rate": 3.830388180764774e-05, + "loss": 2.0924, + "step": 161500 + }, + { + "epoch": 46.93, + "learning_rate": 3.826767091541136e-05, + "loss": 2.2633, + "step": 162000 + }, + { + "epoch": 47.07, + "learning_rate": 3.8231460023174976e-05, + "loss": 2.1603, + "step": 162500 + }, + { + "epoch": 47.22, + "learning_rate": 3.8195249130938585e-05, + "loss": 2.1038, + "step": 163000 + }, + { + "epoch": 47.36, + "learning_rate": 3.815903823870221e-05, + "loss": 2.081, + "step": 163500 + }, + { + "epoch": 47.51, + "learning_rate": 3.812282734646582e-05, + "loss": 2.0259, + "step": 164000 + }, + { + "epoch": 47.65, + "learning_rate": 3.8086616454229434e-05, + "loss": 2.0775, + "step": 164500 + }, + { + "epoch": 47.8, + "learning_rate": 3.805040556199305e-05, + "loss": 2.119, + "step": 165000 + }, + { + "epoch": 47.94, + "learning_rate": 3.8014194669756666e-05, + "loss": 2.0324, + "step": 165500 + }, + { + "epoch": 48.09, + "learning_rate": 3.797798377752028e-05, + "loss": 2.0463, + "step": 166000 + }, + { + "epoch": 48.23, + "learning_rate": 3.794177288528389e-05, + "loss": 1.9982, + "step": 166500 + }, + { + "epoch": 48.38, + "learning_rate": 3.7905561993047514e-05, + "loss": 2.1069, + "step": 167000 + }, + { + "epoch": 48.52, + "learning_rate": 3.786935110081112e-05, + "loss": 2.0855, + "step": 167500 + }, + { + "epoch": 48.67, + "learning_rate": 3.783314020857474e-05, + "loss": 2.0103, + "step": 168000 + }, + { + "epoch": 48.81, + "learning_rate": 3.7796929316338356e-05, + "loss": 2.1315, + "step": 168500 + }, + { + "epoch": 48.96, + "learning_rate": 3.776071842410197e-05, + "loss": 2.2163, + "step": 169000 + }, + { + "epoch": 49.1, + "learning_rate": 3.772450753186559e-05, + "loss": 2.0169, + "step": 169500 + }, + { + "epoch": 49.25, + "learning_rate": 3.76882966396292e-05, + "loss": 1.9853, + "step": 170000 + }, + { + "epoch": 49.39, + "learning_rate": 3.765208574739282e-05, + "loss": 2.0007, + "step": 170500 + }, + { + "epoch": 49.54, + "learning_rate": 3.7615874855156436e-05, + "loss": 1.9034, + "step": 171000 + }, + { + "epoch": 49.68, + "learning_rate": 3.7579663962920045e-05, + "loss": 2.0278, + "step": 171500 + }, + { + "epoch": 49.83, + "learning_rate": 3.754345307068367e-05, + "loss": 2.0554, + "step": 172000 + }, + { + "epoch": 49.97, + "learning_rate": 3.750724217844728e-05, + "loss": 2.0944, + "step": 172500 + }, + { + "epoch": 50.12, + "learning_rate": 3.7471031286210894e-05, + "loss": 1.9666, + "step": 173000 + }, + { + "epoch": 50.26, + "learning_rate": 3.743482039397451e-05, + "loss": 1.9237, + "step": 173500 + }, + { + "epoch": 50.41, + "learning_rate": 3.7398609501738126e-05, + "loss": 1.9957, + "step": 174000 + }, + { + "epoch": 50.55, + "learning_rate": 3.736239860950174e-05, + "loss": 1.9975, + "step": 174500 + }, + { + "epoch": 50.7, + "learning_rate": 3.732618771726535e-05, + "loss": 1.923, + "step": 175000 + }, + { + "epoch": 50.84, + "learning_rate": 3.7289976825028974e-05, + "loss": 2.0131, + "step": 175500 + }, + { + "epoch": 50.98, + "learning_rate": 3.7253765932792583e-05, + "loss": 2.0053, + "step": 176000 + }, + { + "epoch": 51.13, + "learning_rate": 3.72175550405562e-05, + "loss": 1.9155, + "step": 176500 + }, + { + "epoch": 51.27, + "learning_rate": 3.7181344148319816e-05, + "loss": 1.8302, + "step": 177000 + }, + { + "epoch": 51.42, + "learning_rate": 3.714513325608343e-05, + "loss": 1.8982, + "step": 177500 + }, + { + "epoch": 51.56, + "learning_rate": 3.710892236384705e-05, + "loss": 1.9291, + "step": 178000 + }, + { + "epoch": 51.71, + "learning_rate": 3.707271147161066e-05, + "loss": 1.9618, + "step": 178500 + }, + { + "epoch": 51.85, + "learning_rate": 3.703650057937428e-05, + "loss": 1.906, + "step": 179000 + }, + { + "epoch": 52.0, + "learning_rate": 3.700028968713789e-05, + "loss": 1.9577, + "step": 179500 + }, + { + "epoch": 52.14, + "learning_rate": 3.6964078794901505e-05, + "loss": 1.9181, + "step": 180000 + }, + { + "epoch": 52.29, + "learning_rate": 3.692786790266513e-05, + "loss": 1.8794, + "step": 180500 + }, + { + "epoch": 52.43, + "learning_rate": 3.689165701042874e-05, + "loss": 1.8926, + "step": 181000 + }, + { + "epoch": 52.58, + "learning_rate": 3.6855446118192354e-05, + "loss": 1.9498, + "step": 181500 + }, + { + "epoch": 52.72, + "learning_rate": 3.681923522595597e-05, + "loss": 1.8301, + "step": 182000 + }, + { + "epoch": 52.87, + "learning_rate": 3.6783024333719586e-05, + "loss": 1.8718, + "step": 182500 + }, + { + "epoch": 53.01, + "learning_rate": 3.67468134414832e-05, + "loss": 1.9002, + "step": 183000 + }, + { + "epoch": 53.16, + "learning_rate": 3.671060254924681e-05, + "loss": 1.7962, + "step": 183500 + }, + { + "epoch": 53.3, + "learning_rate": 3.6674391657010434e-05, + "loss": 1.7957, + "step": 184000 + }, + { + "epoch": 53.45, + "learning_rate": 3.6638180764774044e-05, + "loss": 1.8693, + "step": 184500 + }, + { + "epoch": 53.59, + "learning_rate": 3.660196987253766e-05, + "loss": 1.8591, + "step": 185000 + }, + { + "epoch": 53.74, + "learning_rate": 3.6565758980301276e-05, + "loss": 1.8654, + "step": 185500 + }, + { + "epoch": 53.88, + "learning_rate": 3.652954808806489e-05, + "loss": 1.8288, + "step": 186000 + }, + { + "epoch": 54.03, + "learning_rate": 3.649333719582851e-05, + "loss": 1.9124, + "step": 186500 + }, + { + "epoch": 54.17, + "learning_rate": 3.6457126303592124e-05, + "loss": 1.7429, + "step": 187000 + }, + { + "epoch": 54.32, + "learning_rate": 3.642091541135574e-05, + "loss": 1.8316, + "step": 187500 + }, + { + "epoch": 54.46, + "learning_rate": 3.638470451911935e-05, + "loss": 1.7558, + "step": 188000 + }, + { + "epoch": 54.61, + "learning_rate": 3.6348493626882966e-05, + "loss": 1.8358, + "step": 188500 + }, + { + "epoch": 54.75, + "learning_rate": 3.631228273464659e-05, + "loss": 1.7743, + "step": 189000 + }, + { + "epoch": 54.9, + "learning_rate": 3.62760718424102e-05, + "loss": 1.8545, + "step": 189500 + }, + { + "epoch": 55.04, + "learning_rate": 3.6239860950173814e-05, + "loss": 1.7785, + "step": 190000 + }, + { + "epoch": 55.19, + "learning_rate": 3.620365005793743e-05, + "loss": 1.764, + "step": 190500 + }, + { + "epoch": 55.33, + "learning_rate": 3.6167439165701046e-05, + "loss": 1.8189, + "step": 191000 + }, + { + "epoch": 55.48, + "learning_rate": 3.613122827346466e-05, + "loss": 1.6939, + "step": 191500 + }, + { + "epoch": 55.62, + "learning_rate": 3.609501738122827e-05, + "loss": 1.7771, + "step": 192000 + }, + { + "epoch": 55.76, + "learning_rate": 3.6058806488991894e-05, + "loss": 1.7576, + "step": 192500 + }, + { + "epoch": 55.91, + "learning_rate": 3.6022595596755504e-05, + "loss": 1.8118, + "step": 193000 + }, + { + "epoch": 56.05, + "learning_rate": 3.598638470451912e-05, + "loss": 1.7491, + "step": 193500 + }, + { + "epoch": 56.2, + "learning_rate": 3.5950173812282736e-05, + "loss": 1.7124, + "step": 194000 + }, + { + "epoch": 56.34, + "learning_rate": 3.591396292004635e-05, + "loss": 1.7616, + "step": 194500 + }, + { + "epoch": 56.49, + "learning_rate": 3.587775202780997e-05, + "loss": 1.7315, + "step": 195000 + }, + { + "epoch": 56.63, + "learning_rate": 3.5841541135573584e-05, + "loss": 1.6236, + "step": 195500 + }, + { + "epoch": 56.78, + "learning_rate": 3.58053302433372e-05, + "loss": 1.7692, + "step": 196000 + }, + { + "epoch": 56.92, + "learning_rate": 3.576911935110081e-05, + "loss": 1.6878, + "step": 196500 + }, + { + "epoch": 57.07, + "learning_rate": 3.5732908458864426e-05, + "loss": 1.6287, + "step": 197000 + }, + { + "epoch": 57.21, + "learning_rate": 3.569669756662805e-05, + "loss": 1.6295, + "step": 197500 + }, + { + "epoch": 57.36, + "learning_rate": 3.566048667439166e-05, + "loss": 1.712, + "step": 198000 + }, + { + "epoch": 57.5, + "learning_rate": 3.5624275782155274e-05, + "loss": 1.6799, + "step": 198500 + }, + { + "epoch": 57.65, + "learning_rate": 3.558806488991889e-05, + "loss": 1.7199, + "step": 199000 + }, + { + "epoch": 57.79, + "learning_rate": 3.5551853997682506e-05, + "loss": 1.6429, + "step": 199500 + }, + { + "epoch": 57.94, + "learning_rate": 3.551564310544612e-05, + "loss": 1.7739, + "step": 200000 + }, + { + "epoch": 58.08, + "learning_rate": 3.547943221320973e-05, + "loss": 1.5893, + "step": 200500 + }, + { + "epoch": 58.23, + "learning_rate": 3.5443221320973354e-05, + "loss": 1.7382, + "step": 201000 + }, + { + "epoch": 58.37, + "learning_rate": 3.5407010428736964e-05, + "loss": 1.587, + "step": 201500 + }, + { + "epoch": 58.52, + "learning_rate": 3.537079953650058e-05, + "loss": 1.6118, + "step": 202000 + }, + { + "epoch": 58.66, + "learning_rate": 3.5334588644264196e-05, + "loss": 1.5871, + "step": 202500 + }, + { + "epoch": 58.81, + "learning_rate": 3.529837775202781e-05, + "loss": 1.5877, + "step": 203000 + }, + { + "epoch": 58.95, + "learning_rate": 3.526216685979143e-05, + "loss": 1.6959, + "step": 203500 + }, + { + "epoch": 59.1, + "learning_rate": 3.5225955967555044e-05, + "loss": 1.6962, + "step": 204000 + }, + { + "epoch": 59.24, + "learning_rate": 3.518974507531866e-05, + "loss": 1.6245, + "step": 204500 + }, + { + "epoch": 59.39, + "learning_rate": 3.515353418308227e-05, + "loss": 1.6294, + "step": 205000 + }, + { + "epoch": 59.53, + "learning_rate": 3.5117323290845886e-05, + "loss": 1.6275, + "step": 205500 + }, + { + "epoch": 59.68, + "learning_rate": 3.508111239860951e-05, + "loss": 1.6681, + "step": 206000 + }, + { + "epoch": 59.82, + "learning_rate": 3.504490150637312e-05, + "loss": 1.5596, + "step": 206500 + }, + { + "epoch": 59.97, + "learning_rate": 3.5008690614136734e-05, + "loss": 1.629, + "step": 207000 + }, + { + "epoch": 60.11, + "learning_rate": 3.497247972190035e-05, + "loss": 1.6084, + "step": 207500 + }, + { + "epoch": 60.25, + "learning_rate": 3.4936268829663966e-05, + "loss": 1.5486, + "step": 208000 + }, + { + "epoch": 60.4, + "learning_rate": 3.4900057937427575e-05, + "loss": 1.5647, + "step": 208500 + }, + { + "epoch": 60.54, + "learning_rate": 3.486384704519119e-05, + "loss": 1.5691, + "step": 209000 + }, + { + "epoch": 60.69, + "learning_rate": 3.4827636152954814e-05, + "loss": 1.5658, + "step": 209500 + }, + { + "epoch": 60.83, + "learning_rate": 3.4791425260718424e-05, + "loss": 1.5257, + "step": 210000 + }, + { + "epoch": 60.98, + "learning_rate": 3.475521436848204e-05, + "loss": 1.5903, + "step": 210500 + }, + { + "epoch": 61.12, + "learning_rate": 3.4719003476245656e-05, + "loss": 1.514, + "step": 211000 + }, + { + "epoch": 61.27, + "learning_rate": 3.468279258400927e-05, + "loss": 1.4983, + "step": 211500 + }, + { + "epoch": 61.41, + "learning_rate": 3.464658169177289e-05, + "loss": 1.5336, + "step": 212000 + }, + { + "epoch": 61.56, + "learning_rate": 3.4610370799536504e-05, + "loss": 1.5524, + "step": 212500 + }, + { + "epoch": 61.7, + "learning_rate": 3.457415990730012e-05, + "loss": 1.5885, + "step": 213000 + }, + { + "epoch": 61.85, + "learning_rate": 3.453794901506373e-05, + "loss": 1.5389, + "step": 213500 + }, + { + "epoch": 61.99, + "learning_rate": 3.4501738122827346e-05, + "loss": 1.5126, + "step": 214000 + }, + { + "epoch": 62.14, + "learning_rate": 3.446552723059097e-05, + "loss": 1.4495, + "step": 214500 + }, + { + "epoch": 62.28, + "learning_rate": 3.442931633835458e-05, + "loss": 1.4506, + "step": 215000 + }, + { + "epoch": 62.43, + "learning_rate": 3.4393105446118194e-05, + "loss": 1.4459, + "step": 215500 + }, + { + "epoch": 62.57, + "learning_rate": 3.435689455388181e-05, + "loss": 1.5571, + "step": 216000 + }, + { + "epoch": 62.72, + "learning_rate": 3.4320683661645426e-05, + "loss": 1.5158, + "step": 216500 + }, + { + "epoch": 62.86, + "learning_rate": 3.4284472769409036e-05, + "loss": 1.5493, + "step": 217000 + }, + { + "epoch": 63.01, + "learning_rate": 3.424826187717265e-05, + "loss": 1.5083, + "step": 217500 + }, + { + "epoch": 63.15, + "learning_rate": 3.4212050984936275e-05, + "loss": 1.4297, + "step": 218000 + }, + { + "epoch": 63.3, + "learning_rate": 3.4175840092699884e-05, + "loss": 1.4847, + "step": 218500 + }, + { + "epoch": 63.44, + "learning_rate": 3.41396292004635e-05, + "loss": 1.446, + "step": 219000 + }, + { + "epoch": 63.59, + "learning_rate": 3.4103418308227116e-05, + "loss": 1.4292, + "step": 219500 + }, + { + "epoch": 63.73, + "learning_rate": 3.406720741599073e-05, + "loss": 1.5258, + "step": 220000 + }, + { + "epoch": 63.88, + "learning_rate": 3.403099652375435e-05, + "loss": 1.4968, + "step": 220500 + }, + { + "epoch": 64.02, + "learning_rate": 3.3994785631517964e-05, + "loss": 1.4877, + "step": 221000 + }, + { + "epoch": 64.17, + "learning_rate": 3.395857473928158e-05, + "loss": 1.4667, + "step": 221500 + }, + { + "epoch": 64.31, + "learning_rate": 3.392236384704519e-05, + "loss": 1.4413, + "step": 222000 + }, + { + "epoch": 64.46, + "learning_rate": 3.3886152954808806e-05, + "loss": 1.4228, + "step": 222500 + }, + { + "epoch": 64.6, + "learning_rate": 3.384994206257243e-05, + "loss": 1.4158, + "step": 223000 + }, + { + "epoch": 64.75, + "learning_rate": 3.381373117033604e-05, + "loss": 1.376, + "step": 223500 + }, + { + "epoch": 64.89, + "learning_rate": 3.3777520278099654e-05, + "loss": 1.4359, + "step": 224000 + }, + { + "epoch": 65.03, + "learning_rate": 3.374130938586327e-05, + "loss": 1.4994, + "step": 224500 + }, + { + "epoch": 65.18, + "learning_rate": 3.3705098493626886e-05, + "loss": 1.3646, + "step": 225000 + }, + { + "epoch": 65.32, + "learning_rate": 3.3668887601390496e-05, + "loss": 1.5025, + "step": 225500 + }, + { + "epoch": 65.47, + "learning_rate": 3.363267670915411e-05, + "loss": 1.4077, + "step": 226000 + }, + { + "epoch": 65.61, + "learning_rate": 3.3596465816917735e-05, + "loss": 1.3323, + "step": 226500 + }, + { + "epoch": 65.76, + "learning_rate": 3.3560254924681344e-05, + "loss": 1.4387, + "step": 227000 + }, + { + "epoch": 65.9, + "learning_rate": 3.352404403244496e-05, + "loss": 1.3418, + "step": 227500 + }, + { + "epoch": 66.05, + "learning_rate": 3.3487833140208576e-05, + "loss": 1.2793, + "step": 228000 + }, + { + "epoch": 66.19, + "learning_rate": 3.345162224797219e-05, + "loss": 1.3484, + "step": 228500 + }, + { + "epoch": 66.34, + "learning_rate": 3.341541135573581e-05, + "loss": 1.3419, + "step": 229000 + }, + { + "epoch": 66.48, + "learning_rate": 3.3379200463499424e-05, + "loss": 1.308, + "step": 229500 + }, + { + "epoch": 66.63, + "learning_rate": 3.334298957126304e-05, + "loss": 1.3193, + "step": 230000 + }, + { + "epoch": 66.77, + "learning_rate": 3.330677867902665e-05, + "loss": 1.4064, + "step": 230500 + }, + { + "epoch": 66.92, + "learning_rate": 3.3270567786790266e-05, + "loss": 1.3766, + "step": 231000 + }, + { + "epoch": 67.06, + "learning_rate": 3.323435689455388e-05, + "loss": 1.3482, + "step": 231500 + }, + { + "epoch": 67.21, + "learning_rate": 3.31981460023175e-05, + "loss": 1.348, + "step": 232000 + }, + { + "epoch": 67.35, + "learning_rate": 3.3161935110081114e-05, + "loss": 1.2824, + "step": 232500 + }, + { + "epoch": 67.5, + "learning_rate": 3.312572421784473e-05, + "loss": 1.2908, + "step": 233000 + }, + { + "epoch": 67.64, + "learning_rate": 3.3089513325608346e-05, + "loss": 1.4077, + "step": 233500 + }, + { + "epoch": 67.79, + "learning_rate": 3.3053302433371956e-05, + "loss": 1.3023, + "step": 234000 + }, + { + "epoch": 67.93, + "learning_rate": 3.301709154113558e-05, + "loss": 1.422, + "step": 234500 + }, + { + "epoch": 68.08, + "learning_rate": 3.2980880648899195e-05, + "loss": 1.3004, + "step": 235000 + }, + { + "epoch": 68.22, + "learning_rate": 3.2944669756662804e-05, + "loss": 1.2346, + "step": 235500 + }, + { + "epoch": 68.37, + "learning_rate": 3.290845886442642e-05, + "loss": 1.2779, + "step": 236000 + }, + { + "epoch": 68.51, + "learning_rate": 3.2872247972190036e-05, + "loss": 1.2945, + "step": 236500 + }, + { + "epoch": 68.66, + "learning_rate": 3.283603707995365e-05, + "loss": 1.2413, + "step": 237000 + }, + { + "epoch": 68.8, + "learning_rate": 3.279982618771727e-05, + "loss": 1.3038, + "step": 237500 + }, + { + "epoch": 68.95, + "learning_rate": 3.2763615295480884e-05, + "loss": 1.3092, + "step": 238000 + }, + { + "epoch": 69.09, + "learning_rate": 3.27274044032445e-05, + "loss": 1.2456, + "step": 238500 + }, + { + "epoch": 69.24, + "learning_rate": 3.269119351100811e-05, + "loss": 1.2915, + "step": 239000 + }, + { + "epoch": 69.38, + "learning_rate": 3.2654982618771726e-05, + "loss": 1.2733, + "step": 239500 + }, + { + "epoch": 69.52, + "learning_rate": 3.261877172653534e-05, + "loss": 1.2595, + "step": 240000 + }, + { + "epoch": 69.67, + "learning_rate": 3.258256083429896e-05, + "loss": 1.2344, + "step": 240500 + }, + { + "epoch": 69.81, + "learning_rate": 3.2546349942062574e-05, + "loss": 1.2483, + "step": 241000 + }, + { + "epoch": 69.96, + "learning_rate": 3.251013904982619e-05, + "loss": 1.3016, + "step": 241500 + }, + { + "epoch": 70.1, + "learning_rate": 3.2473928157589806e-05, + "loss": 1.1828, + "step": 242000 + }, + { + "epoch": 70.25, + "learning_rate": 3.2437717265353416e-05, + "loss": 1.2399, + "step": 242500 + }, + { + "epoch": 70.39, + "learning_rate": 3.240150637311704e-05, + "loss": 1.2375, + "step": 243000 + }, + { + "epoch": 70.54, + "learning_rate": 3.2365295480880655e-05, + "loss": 1.2156, + "step": 243500 + }, + { + "epoch": 70.68, + "learning_rate": 3.2329084588644264e-05, + "loss": 1.2149, + "step": 244000 + }, + { + "epoch": 70.83, + "learning_rate": 3.229287369640788e-05, + "loss": 1.2616, + "step": 244500 + }, + { + "epoch": 70.97, + "learning_rate": 3.2256662804171496e-05, + "loss": 1.2711, + "step": 245000 + }, + { + "epoch": 71.12, + "learning_rate": 3.222045191193511e-05, + "loss": 1.1306, + "step": 245500 + }, + { + "epoch": 71.26, + "learning_rate": 3.218424101969872e-05, + "loss": 1.1036, + "step": 246000 + }, + { + "epoch": 71.41, + "learning_rate": 3.2148030127462345e-05, + "loss": 1.1606, + "step": 246500 + }, + { + "epoch": 71.55, + "learning_rate": 3.211181923522596e-05, + "loss": 1.2608, + "step": 247000 + }, + { + "epoch": 71.7, + "learning_rate": 3.207560834298957e-05, + "loss": 1.2569, + "step": 247500 + }, + { + "epoch": 71.84, + "learning_rate": 3.2039397450753186e-05, + "loss": 1.2382, + "step": 248000 + }, + { + "epoch": 71.99, + "learning_rate": 3.20031865585168e-05, + "loss": 1.1877, + "step": 248500 + }, + { + "epoch": 72.13, + "learning_rate": 3.196697566628042e-05, + "loss": 1.2093, + "step": 249000 + }, + { + "epoch": 72.28, + "learning_rate": 3.1930764774044034e-05, + "loss": 1.133, + "step": 249500 + }, + { + "epoch": 72.42, + "learning_rate": 3.189455388180765e-05, + "loss": 1.1242, + "step": 250000 + }, + { + "epoch": 72.57, + "learning_rate": 3.1858342989571267e-05, + "loss": 1.1529, + "step": 250500 + }, + { + "epoch": 72.71, + "learning_rate": 3.1822132097334876e-05, + "loss": 1.1561, + "step": 251000 + }, + { + "epoch": 72.86, + "learning_rate": 3.17859212050985e-05, + "loss": 1.1941, + "step": 251500 + }, + { + "epoch": 73.0, + "learning_rate": 3.1749710312862115e-05, + "loss": 1.2591, + "step": 252000 + }, + { + "epoch": 73.15, + "learning_rate": 3.1713499420625724e-05, + "loss": 1.0791, + "step": 252500 + }, + { + "epoch": 73.29, + "learning_rate": 3.167728852838934e-05, + "loss": 1.1471, + "step": 253000 + }, + { + "epoch": 73.44, + "learning_rate": 3.1641077636152956e-05, + "loss": 1.0509, + "step": 253500 + }, + { + "epoch": 73.58, + "learning_rate": 3.160486674391657e-05, + "loss": 1.1248, + "step": 254000 + }, + { + "epoch": 73.73, + "learning_rate": 3.156865585168018e-05, + "loss": 1.1184, + "step": 254500 + }, + { + "epoch": 73.87, + "learning_rate": 3.1532444959443805e-05, + "loss": 1.2078, + "step": 255000 + }, + { + "epoch": 74.02, + "learning_rate": 3.149623406720742e-05, + "loss": 1.1616, + "step": 255500 + }, + { + "epoch": 74.16, + "learning_rate": 3.146002317497103e-05, + "loss": 1.0971, + "step": 256000 + }, + { + "epoch": 74.3, + "learning_rate": 3.1423812282734646e-05, + "loss": 1.1006, + "step": 256500 + }, + { + "epoch": 74.45, + "learning_rate": 3.138760139049826e-05, + "loss": 1.1215, + "step": 257000 + }, + { + "epoch": 74.59, + "learning_rate": 3.135139049826188e-05, + "loss": 1.1527, + "step": 257500 + }, + { + "epoch": 74.74, + "learning_rate": 3.1315179606025494e-05, + "loss": 1.1213, + "step": 258000 + }, + { + "epoch": 74.88, + "learning_rate": 3.127896871378911e-05, + "loss": 1.1626, + "step": 258500 + }, + { + "epoch": 75.03, + "learning_rate": 3.1242757821552727e-05, + "loss": 1.0862, + "step": 259000 + }, + { + "epoch": 75.17, + "learning_rate": 3.1206546929316336e-05, + "loss": 1.0992, + "step": 259500 + }, + { + "epoch": 75.32, + "learning_rate": 3.117033603707996e-05, + "loss": 1.0891, + "step": 260000 + }, + { + "epoch": 75.46, + "learning_rate": 3.113412514484357e-05, + "loss": 1.0892, + "step": 260500 + }, + { + "epoch": 75.61, + "learning_rate": 3.1097914252607184e-05, + "loss": 1.0949, + "step": 261000 + }, + { + "epoch": 75.75, + "learning_rate": 3.10617033603708e-05, + "loss": 1.0752, + "step": 261500 + }, + { + "epoch": 75.9, + "learning_rate": 3.1025492468134416e-05, + "loss": 1.1233, + "step": 262000 + }, + { + "epoch": 76.04, + "learning_rate": 3.098928157589803e-05, + "loss": 1.1272, + "step": 262500 + }, + { + "epoch": 76.19, + "learning_rate": 3.095307068366164e-05, + "loss": 1.064, + "step": 263000 + }, + { + "epoch": 76.33, + "learning_rate": 3.0916859791425265e-05, + "loss": 1.0854, + "step": 263500 + }, + { + "epoch": 76.48, + "learning_rate": 3.088064889918888e-05, + "loss": 1.0471, + "step": 264000 + }, + { + "epoch": 76.62, + "learning_rate": 3.084443800695249e-05, + "loss": 1.0993, + "step": 264500 + }, + { + "epoch": 76.77, + "learning_rate": 3.0808227114716106e-05, + "loss": 1.1075, + "step": 265000 + }, + { + "epoch": 76.91, + "learning_rate": 3.077201622247972e-05, + "loss": 1.0724, + "step": 265500 + }, + { + "epoch": 77.06, + "learning_rate": 3.073580533024334e-05, + "loss": 1.0793, + "step": 266000 + }, + { + "epoch": 77.2, + "learning_rate": 3.0699594438006954e-05, + "loss": 1.0535, + "step": 266500 + }, + { + "epoch": 77.35, + "learning_rate": 3.066338354577057e-05, + "loss": 0.9771, + "step": 267000 + }, + { + "epoch": 77.49, + "learning_rate": 3.062717265353419e-05, + "loss": 1.0115, + "step": 267500 + }, + { + "epoch": 77.64, + "learning_rate": 3.0590961761297796e-05, + "loss": 1.0827, + "step": 268000 + }, + { + "epoch": 77.78, + "learning_rate": 3.055475086906142e-05, + "loss": 1.0376, + "step": 268500 + }, + { + "epoch": 77.93, + "learning_rate": 3.051853997682503e-05, + "loss": 1.0339, + "step": 269000 + }, + { + "epoch": 78.07, + "learning_rate": 3.0482329084588644e-05, + "loss": 1.0429, + "step": 269500 + }, + { + "epoch": 78.22, + "learning_rate": 3.0446118192352264e-05, + "loss": 1.0557, + "step": 270000 + }, + { + "epoch": 78.36, + "learning_rate": 3.0409907300115876e-05, + "loss": 0.9607, + "step": 270500 + }, + { + "epoch": 78.51, + "learning_rate": 3.0373696407879493e-05, + "loss": 1.0016, + "step": 271000 + }, + { + "epoch": 78.65, + "learning_rate": 3.0337485515643105e-05, + "loss": 1.0083, + "step": 271500 + }, + { + "epoch": 78.79, + "learning_rate": 3.030127462340672e-05, + "loss": 1.0785, + "step": 272000 + }, + { + "epoch": 78.94, + "learning_rate": 3.026506373117034e-05, + "loss": 1.0867, + "step": 272500 + }, + { + "epoch": 79.08, + "learning_rate": 3.0228852838933954e-05, + "loss": 0.9776, + "step": 273000 + }, + { + "epoch": 79.23, + "learning_rate": 3.019264194669757e-05, + "loss": 0.9559, + "step": 273500 + }, + { + "epoch": 79.37, + "learning_rate": 3.0156431054461182e-05, + "loss": 0.9912, + "step": 274000 + }, + { + "epoch": 79.52, + "learning_rate": 3.01202201622248e-05, + "loss": 1.0311, + "step": 274500 + }, + { + "epoch": 79.66, + "learning_rate": 3.008400926998841e-05, + "loss": 1.0281, + "step": 275000 + }, + { + "epoch": 79.81, + "learning_rate": 3.0047798377752027e-05, + "loss": 0.9119, + "step": 275500 + }, + { + "epoch": 79.95, + "learning_rate": 3.0011587485515647e-05, + "loss": 1.0614, + "step": 276000 + }, + { + "epoch": 80.1, + "learning_rate": 2.997537659327926e-05, + "loss": 1.0004, + "step": 276500 + }, + { + "epoch": 80.24, + "learning_rate": 2.9939165701042876e-05, + "loss": 0.9568, + "step": 277000 + }, + { + "epoch": 80.39, + "learning_rate": 2.9902954808806488e-05, + "loss": 0.9485, + "step": 277500 + }, + { + "epoch": 80.53, + "learning_rate": 2.9866743916570104e-05, + "loss": 0.9932, + "step": 278000 + }, + { + "epoch": 80.68, + "learning_rate": 2.9830533024333724e-05, + "loss": 0.9095, + "step": 278500 + }, + { + "epoch": 80.82, + "learning_rate": 2.9794322132097337e-05, + "loss": 0.929, + "step": 279000 + }, + { + "epoch": 80.97, + "learning_rate": 2.9758111239860953e-05, + "loss": 0.9774, + "step": 279500 + }, + { + "epoch": 81.11, + "learning_rate": 2.9721900347624565e-05, + "loss": 0.9507, + "step": 280000 + }, + { + "epoch": 81.26, + "learning_rate": 2.968568945538818e-05, + "loss": 0.9107, + "step": 280500 + }, + { + "epoch": 81.4, + "learning_rate": 2.96494785631518e-05, + "loss": 0.9239, + "step": 281000 + }, + { + "epoch": 81.55, + "learning_rate": 2.9613267670915414e-05, + "loss": 0.9795, + "step": 281500 + }, + { + "epoch": 81.69, + "learning_rate": 2.957705677867903e-05, + "loss": 0.9762, + "step": 282000 + }, + { + "epoch": 81.84, + "learning_rate": 2.9540845886442642e-05, + "loss": 0.9214, + "step": 282500 + }, + { + "epoch": 81.98, + "learning_rate": 2.950463499420626e-05, + "loss": 1.0133, + "step": 283000 + }, + { + "epoch": 82.13, + "learning_rate": 2.946842410196987e-05, + "loss": 0.9045, + "step": 283500 + }, + { + "epoch": 82.27, + "learning_rate": 2.943221320973349e-05, + "loss": 0.91, + "step": 284000 + }, + { + "epoch": 82.42, + "learning_rate": 2.9396002317497107e-05, + "loss": 0.9366, + "step": 284500 + }, + { + "epoch": 82.56, + "learning_rate": 2.935979142526072e-05, + "loss": 0.8921, + "step": 285000 + }, + { + "epoch": 82.71, + "learning_rate": 2.9323580533024336e-05, + "loss": 0.9532, + "step": 285500 + }, + { + "epoch": 82.85, + "learning_rate": 2.928736964078795e-05, + "loss": 0.9885, + "step": 286000 + }, + { + "epoch": 83.0, + "learning_rate": 2.9251158748551564e-05, + "loss": 0.953, + "step": 286500 + }, + { + "epoch": 83.14, + "learning_rate": 2.9214947856315184e-05, + "loss": 0.8898, + "step": 287000 + }, + { + "epoch": 83.29, + "learning_rate": 2.9178736964078797e-05, + "loss": 0.8683, + "step": 287500 + }, + { + "epoch": 83.43, + "learning_rate": 2.9142526071842413e-05, + "loss": 0.8697, + "step": 288000 + }, + { + "epoch": 83.57, + "learning_rate": 2.9106315179606025e-05, + "loss": 0.9246, + "step": 288500 + }, + { + "epoch": 83.72, + "learning_rate": 2.907010428736964e-05, + "loss": 0.904, + "step": 289000 + }, + { + "epoch": 83.86, + "learning_rate": 2.9033893395133254e-05, + "loss": 0.8879, + "step": 289500 + }, + { + "epoch": 84.01, + "learning_rate": 2.8997682502896874e-05, + "loss": 0.9338, + "step": 290000 + }, + { + "epoch": 84.15, + "learning_rate": 2.896147161066049e-05, + "loss": 0.8704, + "step": 290500 + }, + { + "epoch": 84.3, + "learning_rate": 2.8925260718424102e-05, + "loss": 0.8463, + "step": 291000 + }, + { + "epoch": 84.44, + "learning_rate": 2.888904982618772e-05, + "loss": 0.8428, + "step": 291500 + }, + { + "epoch": 84.59, + "learning_rate": 2.885283893395133e-05, + "loss": 0.9357, + "step": 292000 + }, + { + "epoch": 84.73, + "learning_rate": 2.881662804171495e-05, + "loss": 0.8905, + "step": 292500 + }, + { + "epoch": 84.88, + "learning_rate": 2.8780417149478567e-05, + "loss": 0.9016, + "step": 293000 + }, + { + "epoch": 85.02, + "learning_rate": 2.874420625724218e-05, + "loss": 0.9352, + "step": 293500 + }, + { + "epoch": 85.17, + "learning_rate": 2.8707995365005796e-05, + "loss": 0.8342, + "step": 294000 + }, + { + "epoch": 85.31, + "learning_rate": 2.867178447276941e-05, + "loss": 0.8075, + "step": 294500 + }, + { + "epoch": 85.46, + "learning_rate": 2.8635573580533024e-05, + "loss": 0.8817, + "step": 295000 + }, + { + "epoch": 85.6, + "learning_rate": 2.8599362688296644e-05, + "loss": 0.8684, + "step": 295500 + }, + { + "epoch": 85.75, + "learning_rate": 2.8563151796060257e-05, + "loss": 0.78, + "step": 296000 + }, + { + "epoch": 85.89, + "learning_rate": 2.8526940903823873e-05, + "loss": 0.9032, + "step": 296500 + }, + { + "epoch": 86.04, + "learning_rate": 2.8490730011587485e-05, + "loss": 0.8435, + "step": 297000 + }, + { + "epoch": 86.18, + "learning_rate": 2.84545191193511e-05, + "loss": 0.8357, + "step": 297500 + }, + { + "epoch": 86.33, + "learning_rate": 2.8418308227114714e-05, + "loss": 0.8398, + "step": 298000 + }, + { + "epoch": 86.47, + "learning_rate": 2.8382097334878334e-05, + "loss": 0.8226, + "step": 298500 + }, + { + "epoch": 86.62, + "learning_rate": 2.834588644264195e-05, + "loss": 0.8855, + "step": 299000 + }, + { + "epoch": 86.76, + "learning_rate": 2.8309675550405563e-05, + "loss": 0.8002, + "step": 299500 + }, + { + "epoch": 86.91, + "learning_rate": 2.827346465816918e-05, + "loss": 0.8452, + "step": 300000 + }, + { + "epoch": 87.05, + "learning_rate": 2.823725376593279e-05, + "loss": 0.917, + "step": 300500 + }, + { + "epoch": 87.2, + "learning_rate": 2.820104287369641e-05, + "loss": 0.8106, + "step": 301000 + }, + { + "epoch": 87.34, + "learning_rate": 2.8164831981460027e-05, + "loss": 0.8121, + "step": 301500 + }, + { + "epoch": 87.49, + "learning_rate": 2.812862108922364e-05, + "loss": 0.8031, + "step": 302000 + }, + { + "epoch": 87.63, + "learning_rate": 2.8092410196987256e-05, + "loss": 0.7967, + "step": 302500 + }, + { + "epoch": 87.78, + "learning_rate": 2.805619930475087e-05, + "loss": 0.8531, + "step": 303000 + }, + { + "epoch": 87.92, + "learning_rate": 2.8019988412514488e-05, + "loss": 0.8355, + "step": 303500 + }, + { + "epoch": 88.06, + "learning_rate": 2.7983777520278097e-05, + "loss": 0.8451, + "step": 304000 + }, + { + "epoch": 88.21, + "learning_rate": 2.7947566628041717e-05, + "loss": 0.787, + "step": 304500 + }, + { + "epoch": 88.35, + "learning_rate": 2.7911355735805333e-05, + "loss": 0.8058, + "step": 305000 + }, + { + "epoch": 88.5, + "learning_rate": 2.7875144843568946e-05, + "loss": 0.802, + "step": 305500 + }, + { + "epoch": 88.64, + "learning_rate": 2.783893395133256e-05, + "loss": 0.8519, + "step": 306000 + }, + { + "epoch": 88.79, + "learning_rate": 2.7802723059096174e-05, + "loss": 0.7892, + "step": 306500 + }, + { + "epoch": 88.93, + "learning_rate": 2.7766512166859794e-05, + "loss": 0.7878, + "step": 307000 + }, + { + "epoch": 89.08, + "learning_rate": 2.773030127462341e-05, + "loss": 0.7799, + "step": 307500 + }, + { + "epoch": 89.22, + "learning_rate": 2.7694090382387023e-05, + "loss": 0.7811, + "step": 308000 + }, + { + "epoch": 89.37, + "learning_rate": 2.765787949015064e-05, + "loss": 0.7494, + "step": 308500 + }, + { + "epoch": 89.51, + "learning_rate": 2.762166859791425e-05, + "loss": 0.7852, + "step": 309000 + }, + { + "epoch": 89.66, + "learning_rate": 2.758545770567787e-05, + "loss": 0.8175, + "step": 309500 + }, + { + "epoch": 89.8, + "learning_rate": 2.7549246813441487e-05, + "loss": 0.7376, + "step": 310000 + }, + { + "epoch": 89.95, + "learning_rate": 2.75130359212051e-05, + "loss": 0.7756, + "step": 310500 + }, + { + "epoch": 90.09, + "learning_rate": 2.7476825028968716e-05, + "loss": 0.7502, + "step": 311000 + }, + { + "epoch": 90.24, + "learning_rate": 2.744061413673233e-05, + "loss": 0.715, + "step": 311500 + }, + { + "epoch": 90.38, + "learning_rate": 2.7404403244495948e-05, + "loss": 0.7592, + "step": 312000 + }, + { + "epoch": 90.53, + "learning_rate": 2.7368192352259557e-05, + "loss": 0.756, + "step": 312500 + }, + { + "epoch": 90.67, + "learning_rate": 2.7331981460023177e-05, + "loss": 0.8058, + "step": 313000 + }, + { + "epoch": 90.82, + "learning_rate": 2.7295770567786793e-05, + "loss": 0.7458, + "step": 313500 + }, + { + "epoch": 90.96, + "learning_rate": 2.7259559675550406e-05, + "loss": 0.7984, + "step": 314000 + }, + { + "epoch": 91.11, + "learning_rate": 2.7223348783314022e-05, + "loss": 0.7359, + "step": 314500 + }, + { + "epoch": 91.25, + "learning_rate": 2.7187137891077634e-05, + "loss": 0.7224, + "step": 315000 + }, + { + "epoch": 91.4, + "learning_rate": 2.7150926998841254e-05, + "loss": 0.7386, + "step": 315500 + }, + { + "epoch": 91.54, + "learning_rate": 2.711471610660487e-05, + "loss": 0.7468, + "step": 316000 + }, + { + "epoch": 91.69, + "learning_rate": 2.7078505214368483e-05, + "loss": 0.7356, + "step": 316500 + }, + { + "epoch": 91.83, + "learning_rate": 2.70422943221321e-05, + "loss": 0.7545, + "step": 317000 + }, + { + "epoch": 91.98, + "learning_rate": 2.700608342989571e-05, + "loss": 0.7898, + "step": 317500 + }, + { + "epoch": 92.12, + "learning_rate": 2.696987253765933e-05, + "loss": 0.7097, + "step": 318000 + }, + { + "epoch": 92.27, + "learning_rate": 2.693366164542294e-05, + "loss": 0.7192, + "step": 318500 + }, + { + "epoch": 92.41, + "learning_rate": 2.689745075318656e-05, + "loss": 0.7318, + "step": 319000 + }, + { + "epoch": 92.56, + "learning_rate": 2.6861239860950176e-05, + "loss": 0.6905, + "step": 319500 + }, + { + "epoch": 92.7, + "learning_rate": 2.682502896871379e-05, + "loss": 0.7404, + "step": 320000 + }, + { + "epoch": 92.84, + "learning_rate": 2.6788818076477408e-05, + "loss": 0.7495, + "step": 320500 + }, + { + "epoch": 92.99, + "learning_rate": 2.6752607184241017e-05, + "loss": 0.7459, + "step": 321000 + }, + { + "epoch": 93.13, + "learning_rate": 2.6716396292004637e-05, + "loss": 0.7149, + "step": 321500 + }, + { + "epoch": 93.28, + "learning_rate": 2.6680185399768253e-05, + "loss": 0.6852, + "step": 322000 + }, + { + "epoch": 93.42, + "learning_rate": 2.6643974507531866e-05, + "loss": 0.6797, + "step": 322500 + }, + { + "epoch": 93.57, + "learning_rate": 2.6607763615295482e-05, + "loss": 0.6807, + "step": 323000 + }, + { + "epoch": 93.71, + "learning_rate": 2.6571552723059094e-05, + "loss": 0.7131, + "step": 323500 + }, + { + "epoch": 93.86, + "learning_rate": 2.6535341830822714e-05, + "loss": 0.7032, + "step": 324000 + }, + { + "epoch": 94.0, + "learning_rate": 2.649913093858633e-05, + "loss": 0.7271, + "step": 324500 + }, + { + "epoch": 94.15, + "learning_rate": 2.6462920046349943e-05, + "loss": 0.6575, + "step": 325000 + }, + { + "epoch": 94.29, + "learning_rate": 2.642670915411356e-05, + "loss": 0.7185, + "step": 325500 + }, + { + "epoch": 94.44, + "learning_rate": 2.639049826187717e-05, + "loss": 0.7487, + "step": 326000 + }, + { + "epoch": 94.58, + "learning_rate": 2.635428736964079e-05, + "loss": 0.7181, + "step": 326500 + }, + { + "epoch": 94.73, + "learning_rate": 2.63180764774044e-05, + "loss": 0.6834, + "step": 327000 + }, + { + "epoch": 94.87, + "learning_rate": 2.628186558516802e-05, + "loss": 0.7236, + "step": 327500 + }, + { + "epoch": 95.02, + "learning_rate": 2.6245654692931636e-05, + "loss": 0.6998, + "step": 328000 + }, + { + "epoch": 95.16, + "learning_rate": 2.620944380069525e-05, + "loss": 0.6912, + "step": 328500 + }, + { + "epoch": 95.31, + "learning_rate": 2.6173232908458868e-05, + "loss": 0.6545, + "step": 329000 + }, + { + "epoch": 95.45, + "learning_rate": 2.6137022016222477e-05, + "loss": 0.6852, + "step": 329500 + }, + { + "epoch": 95.6, + "learning_rate": 2.6100811123986097e-05, + "loss": 0.6328, + "step": 330000 + }, + { + "epoch": 95.74, + "learning_rate": 2.6064600231749713e-05, + "loss": 0.6718, + "step": 330500 + }, + { + "epoch": 95.89, + "learning_rate": 2.6028389339513326e-05, + "loss": 0.6903, + "step": 331000 + }, + { + "epoch": 96.03, + "learning_rate": 2.5992178447276945e-05, + "loss": 0.663, + "step": 331500 + }, + { + "epoch": 96.18, + "learning_rate": 2.5955967555040555e-05, + "loss": 0.6634, + "step": 332000 + }, + { + "epoch": 96.32, + "learning_rate": 2.5919756662804174e-05, + "loss": 0.6536, + "step": 332500 + }, + { + "epoch": 96.47, + "learning_rate": 2.5883545770567787e-05, + "loss": 0.6705, + "step": 333000 + }, + { + "epoch": 96.61, + "learning_rate": 2.5847334878331403e-05, + "loss": 0.6472, + "step": 333500 + }, + { + "epoch": 96.76, + "learning_rate": 2.581112398609502e-05, + "loss": 0.6946, + "step": 334000 + }, + { + "epoch": 96.9, + "learning_rate": 2.577491309385863e-05, + "loss": 0.6203, + "step": 334500 + }, + { + "epoch": 97.05, + "learning_rate": 2.573870220162225e-05, + "loss": 0.6359, + "step": 335000 + }, + { + "epoch": 97.19, + "learning_rate": 2.570249130938586e-05, + "loss": 0.6363, + "step": 335500 + }, + { + "epoch": 97.33, + "learning_rate": 2.566628041714948e-05, + "loss": 0.6343, + "step": 336000 + }, + { + "epoch": 97.48, + "learning_rate": 2.5630069524913096e-05, + "loss": 0.6251, + "step": 336500 + }, + { + "epoch": 97.62, + "learning_rate": 2.559385863267671e-05, + "loss": 0.6324, + "step": 337000 + }, + { + "epoch": 97.77, + "learning_rate": 2.5557647740440328e-05, + "loss": 0.6567, + "step": 337500 + }, + { + "epoch": 97.91, + "learning_rate": 2.5521436848203938e-05, + "loss": 0.6568, + "step": 338000 + }, + { + "epoch": 98.06, + "learning_rate": 2.5485225955967557e-05, + "loss": 0.6158, + "step": 338500 + }, + { + "epoch": 98.2, + "learning_rate": 2.5449015063731173e-05, + "loss": 0.5807, + "step": 339000 + }, + { + "epoch": 98.35, + "learning_rate": 2.5412804171494786e-05, + "loss": 0.6511, + "step": 339500 + }, + { + "epoch": 98.49, + "learning_rate": 2.5376593279258405e-05, + "loss": 0.6278, + "step": 340000 + }, + { + "epoch": 98.64, + "learning_rate": 2.5340382387022015e-05, + "loss": 0.6598, + "step": 340500 + }, + { + "epoch": 98.78, + "learning_rate": 2.5304171494785634e-05, + "loss": 0.6021, + "step": 341000 + }, + { + "epoch": 98.93, + "learning_rate": 2.5267960602549247e-05, + "loss": 0.6365, + "step": 341500 + }, + { + "epoch": 99.07, + "learning_rate": 2.5231749710312863e-05, + "loss": 0.6539, + "step": 342000 + }, + { + "epoch": 99.22, + "learning_rate": 2.519553881807648e-05, + "loss": 0.5917, + "step": 342500 + }, + { + "epoch": 99.36, + "learning_rate": 2.5159327925840092e-05, + "loss": 0.6234, + "step": 343000 + }, + { + "epoch": 99.51, + "learning_rate": 2.512311703360371e-05, + "loss": 0.6112, + "step": 343500 + }, + { + "epoch": 99.65, + "learning_rate": 2.5086906141367324e-05, + "loss": 0.6155, + "step": 344000 + }, + { + "epoch": 99.8, + "learning_rate": 2.505069524913094e-05, + "loss": 0.5829, + "step": 344500 + }, + { + "epoch": 99.94, + "learning_rate": 2.5014484356894556e-05, + "loss": 0.6073, + "step": 345000 + }, + { + "epoch": 100.09, + "learning_rate": 2.497827346465817e-05, + "loss": 0.5886, + "step": 345500 + }, + { + "epoch": 100.23, + "learning_rate": 2.4942062572421785e-05, + "loss": 0.5803, + "step": 346000 + }, + { + "epoch": 100.38, + "learning_rate": 2.49058516801854e-05, + "loss": 0.5704, + "step": 346500 + }, + { + "epoch": 100.52, + "learning_rate": 2.4869640787949017e-05, + "loss": 0.5902, + "step": 347000 + }, + { + "epoch": 100.67, + "learning_rate": 2.4833429895712633e-05, + "loss": 0.5799, + "step": 347500 + }, + { + "epoch": 100.81, + "learning_rate": 2.4797219003476246e-05, + "loss": 0.5898, + "step": 348000 + }, + { + "epoch": 100.96, + "learning_rate": 2.4761008111239862e-05, + "loss": 0.6129, + "step": 348500 + }, + { + "epoch": 101.1, + "learning_rate": 2.4724797219003478e-05, + "loss": 0.6226, + "step": 349000 + }, + { + "epoch": 101.25, + "learning_rate": 2.4688586326767094e-05, + "loss": 0.6064, + "step": 349500 + }, + { + "epoch": 101.39, + "learning_rate": 2.4652375434530707e-05, + "loss": 0.5727, + "step": 350000 + }, + { + "epoch": 101.54, + "learning_rate": 2.4616164542294323e-05, + "loss": 0.5478, + "step": 350500 + }, + { + "epoch": 101.68, + "learning_rate": 2.457995365005794e-05, + "loss": 0.5586, + "step": 351000 + }, + { + "epoch": 101.83, + "learning_rate": 2.4543742757821552e-05, + "loss": 0.5872, + "step": 351500 + }, + { + "epoch": 101.97, + "learning_rate": 2.450753186558517e-05, + "loss": 0.6057, + "step": 352000 + }, + { + "epoch": 102.11, + "learning_rate": 2.4471320973348784e-05, + "loss": 0.5403, + "step": 352500 + }, + { + "epoch": 102.26, + "learning_rate": 2.44351100811124e-05, + "loss": 0.5902, + "step": 353000 + }, + { + "epoch": 102.4, + "learning_rate": 2.4398899188876016e-05, + "loss": 0.5792, + "step": 353500 + }, + { + "epoch": 102.55, + "learning_rate": 2.436268829663963e-05, + "loss": 0.6074, + "step": 354000 + }, + { + "epoch": 102.69, + "learning_rate": 2.4326477404403245e-05, + "loss": 0.5505, + "step": 354500 + }, + { + "epoch": 102.84, + "learning_rate": 2.429026651216686e-05, + "loss": 0.6083, + "step": 355000 + }, + { + "epoch": 102.98, + "learning_rate": 2.4254055619930477e-05, + "loss": 0.5578, + "step": 355500 + }, + { + "epoch": 103.13, + "learning_rate": 2.4217844727694093e-05, + "loss": 0.5082, + "step": 356000 + }, + { + "epoch": 103.27, + "learning_rate": 2.4181633835457706e-05, + "loss": 0.5695, + "step": 356500 + }, + { + "epoch": 103.42, + "learning_rate": 2.4145422943221322e-05, + "loss": 0.5668, + "step": 357000 + }, + { + "epoch": 103.56, + "learning_rate": 2.4109212050984935e-05, + "loss": 0.5363, + "step": 357500 + }, + { + "epoch": 103.71, + "learning_rate": 2.4073001158748554e-05, + "loss": 0.5375, + "step": 358000 + }, + { + "epoch": 103.85, + "learning_rate": 2.4036790266512167e-05, + "loss": 0.5513, + "step": 358500 + }, + { + "epoch": 104.0, + "learning_rate": 2.4000579374275783e-05, + "loss": 0.5869, + "step": 359000 + }, + { + "epoch": 104.14, + "learning_rate": 2.39643684820394e-05, + "loss": 0.55, + "step": 359500 + }, + { + "epoch": 104.29, + "learning_rate": 2.3928157589803012e-05, + "loss": 0.5484, + "step": 360000 + }, + { + "epoch": 104.43, + "learning_rate": 2.3891946697566628e-05, + "loss": 0.5442, + "step": 360500 + }, + { + "epoch": 104.58, + "learning_rate": 2.3855735805330244e-05, + "loss": 0.5365, + "step": 361000 + }, + { + "epoch": 104.72, + "learning_rate": 2.381952491309386e-05, + "loss": 0.5659, + "step": 361500 + }, + { + "epoch": 104.87, + "learning_rate": 2.3783314020857476e-05, + "loss": 0.5471, + "step": 362000 + }, + { + "epoch": 105.01, + "learning_rate": 2.374710312862109e-05, + "loss": 0.5269, + "step": 362500 + }, + { + "epoch": 105.16, + "learning_rate": 2.3710892236384705e-05, + "loss": 0.5456, + "step": 363000 + }, + { + "epoch": 105.3, + "learning_rate": 2.367468134414832e-05, + "loss": 0.526, + "step": 363500 + }, + { + "epoch": 105.45, + "learning_rate": 2.3638470451911937e-05, + "loss": 0.5215, + "step": 364000 + }, + { + "epoch": 105.59, + "learning_rate": 2.3602259559675553e-05, + "loss": 0.5163, + "step": 364500 + }, + { + "epoch": 105.74, + "learning_rate": 2.3566048667439166e-05, + "loss": 0.5441, + "step": 365000 + }, + { + "epoch": 105.88, + "learning_rate": 2.3529837775202782e-05, + "loss": 0.5098, + "step": 365500 + }, + { + "epoch": 106.03, + "learning_rate": 2.3493626882966395e-05, + "loss": 0.5359, + "step": 366000 + }, + { + "epoch": 106.17, + "learning_rate": 2.3457415990730014e-05, + "loss": 0.5015, + "step": 366500 + }, + { + "epoch": 106.32, + "learning_rate": 2.342120509849363e-05, + "loss": 0.4858, + "step": 367000 + }, + { + "epoch": 106.46, + "learning_rate": 2.3384994206257243e-05, + "loss": 0.5311, + "step": 367500 + }, + { + "epoch": 106.6, + "learning_rate": 2.334878331402086e-05, + "loss": 0.5061, + "step": 368000 + }, + { + "epoch": 106.75, + "learning_rate": 2.3312572421784472e-05, + "loss": 0.5404, + "step": 368500 + }, + { + "epoch": 106.89, + "learning_rate": 2.3276361529548088e-05, + "loss": 0.5108, + "step": 369000 + }, + { + "epoch": 107.04, + "learning_rate": 2.3240150637311704e-05, + "loss": 0.4969, + "step": 369500 + }, + { + "epoch": 107.18, + "learning_rate": 2.320393974507532e-05, + "loss": 0.4969, + "step": 370000 + }, + { + "epoch": 107.33, + "learning_rate": 2.3167728852838936e-05, + "loss": 0.5172, + "step": 370500 + }, + { + "epoch": 107.47, + "learning_rate": 2.313151796060255e-05, + "loss": 0.5653, + "step": 371000 + }, + { + "epoch": 107.62, + "learning_rate": 2.3095307068366165e-05, + "loss": 0.517, + "step": 371500 + }, + { + "epoch": 107.76, + "learning_rate": 2.305909617612978e-05, + "loss": 0.496, + "step": 372000 + }, + { + "epoch": 107.91, + "learning_rate": 2.3022885283893397e-05, + "loss": 0.5357, + "step": 372500 + }, + { + "epoch": 108.05, + "learning_rate": 2.2986674391657013e-05, + "loss": 0.5115, + "step": 373000 + }, + { + "epoch": 108.2, + "learning_rate": 2.2950463499420626e-05, + "loss": 0.4683, + "step": 373500 + }, + { + "epoch": 108.34, + "learning_rate": 2.2914252607184242e-05, + "loss": 0.5017, + "step": 374000 + }, + { + "epoch": 108.49, + "learning_rate": 2.2878041714947855e-05, + "loss": 0.479, + "step": 374500 + }, + { + "epoch": 108.63, + "learning_rate": 2.284183082271147e-05, + "loss": 0.4886, + "step": 375000 + }, + { + "epoch": 108.78, + "learning_rate": 2.280561993047509e-05, + "loss": 0.4825, + "step": 375500 + }, + { + "epoch": 108.92, + "learning_rate": 2.2769409038238703e-05, + "loss": 0.4878, + "step": 376000 + }, + { + "epoch": 109.07, + "learning_rate": 2.273319814600232e-05, + "loss": 0.4942, + "step": 376500 + }, + { + "epoch": 109.21, + "learning_rate": 2.2696987253765932e-05, + "loss": 0.5016, + "step": 377000 + }, + { + "epoch": 109.36, + "learning_rate": 2.2660776361529548e-05, + "loss": 0.4747, + "step": 377500 + }, + { + "epoch": 109.5, + "learning_rate": 2.2624565469293164e-05, + "loss": 0.4661, + "step": 378000 + }, + { + "epoch": 109.65, + "learning_rate": 2.258835457705678e-05, + "loss": 0.4792, + "step": 378500 + }, + { + "epoch": 109.79, + "learning_rate": 2.2552143684820396e-05, + "loss": 0.5083, + "step": 379000 + }, + { + "epoch": 109.94, + "learning_rate": 2.251593279258401e-05, + "loss": 0.5082, + "step": 379500 + }, + { + "epoch": 110.08, + "learning_rate": 2.2479721900347625e-05, + "loss": 0.4781, + "step": 380000 + }, + { + "epoch": 110.23, + "learning_rate": 2.244351100811124e-05, + "loss": 0.4532, + "step": 380500 + }, + { + "epoch": 110.37, + "learning_rate": 2.2407300115874857e-05, + "loss": 0.4799, + "step": 381000 + }, + { + "epoch": 110.52, + "learning_rate": 2.2371089223638473e-05, + "loss": 0.47, + "step": 381500 + }, + { + "epoch": 110.66, + "learning_rate": 2.2334878331402086e-05, + "loss": 0.4906, + "step": 382000 + }, + { + "epoch": 110.81, + "learning_rate": 2.2298667439165702e-05, + "loss": 0.5021, + "step": 382500 + }, + { + "epoch": 110.95, + "learning_rate": 2.226245654692932e-05, + "loss": 0.505, + "step": 383000 + }, + { + "epoch": 111.1, + "learning_rate": 2.222624565469293e-05, + "loss": 0.4447, + "step": 383500 + }, + { + "epoch": 111.24, + "learning_rate": 2.219003476245655e-05, + "loss": 0.4363, + "step": 384000 + }, + { + "epoch": 111.38, + "learning_rate": 2.2153823870220163e-05, + "loss": 0.4352, + "step": 384500 + }, + { + "epoch": 111.53, + "learning_rate": 2.211761297798378e-05, + "loss": 0.4598, + "step": 385000 + }, + { + "epoch": 111.67, + "learning_rate": 2.2081402085747392e-05, + "loss": 0.4937, + "step": 385500 + }, + { + "epoch": 111.82, + "learning_rate": 2.2045191193511008e-05, + "loss": 0.5125, + "step": 386000 + }, + { + "epoch": 111.96, + "learning_rate": 2.2008980301274624e-05, + "loss": 0.446, + "step": 386500 + }, + { + "epoch": 112.11, + "learning_rate": 2.197276940903824e-05, + "loss": 0.4852, + "step": 387000 + }, + { + "epoch": 112.25, + "learning_rate": 2.1936558516801856e-05, + "loss": 0.4505, + "step": 387500 + }, + { + "epoch": 112.4, + "learning_rate": 2.190034762456547e-05, + "loss": 0.4709, + "step": 388000 + }, + { + "epoch": 112.54, + "learning_rate": 2.1864136732329085e-05, + "loss": 0.4521, + "step": 388500 + }, + { + "epoch": 112.69, + "learning_rate": 2.18279258400927e-05, + "loss": 0.4275, + "step": 389000 + }, + { + "epoch": 112.83, + "learning_rate": 2.1791714947856314e-05, + "loss": 0.4625, + "step": 389500 + }, + { + "epoch": 112.98, + "learning_rate": 2.1755504055619934e-05, + "loss": 0.4842, + "step": 390000 + }, + { + "epoch": 113.12, + "learning_rate": 2.1719293163383546e-05, + "loss": 0.4591, + "step": 390500 + }, + { + "epoch": 113.27, + "learning_rate": 2.1683082271147162e-05, + "loss": 0.4878, + "step": 391000 + }, + { + "epoch": 113.41, + "learning_rate": 2.164687137891078e-05, + "loss": 0.4352, + "step": 391500 + }, + { + "epoch": 113.56, + "learning_rate": 2.161066048667439e-05, + "loss": 0.4387, + "step": 392000 + }, + { + "epoch": 113.7, + "learning_rate": 2.157444959443801e-05, + "loss": 0.4172, + "step": 392500 + }, + { + "epoch": 113.85, + "learning_rate": 2.1538238702201623e-05, + "loss": 0.4304, + "step": 393000 + }, + { + "epoch": 113.99, + "learning_rate": 2.150202780996524e-05, + "loss": 0.4296, + "step": 393500 + }, + { + "epoch": 114.14, + "learning_rate": 2.1465816917728852e-05, + "loss": 0.4355, + "step": 394000 + }, + { + "epoch": 114.28, + "learning_rate": 2.1429606025492468e-05, + "loss": 0.4185, + "step": 394500 + }, + { + "epoch": 114.43, + "learning_rate": 2.1393395133256084e-05, + "loss": 0.4318, + "step": 395000 + }, + { + "epoch": 114.57, + "learning_rate": 2.13571842410197e-05, + "loss": 0.4081, + "step": 395500 + }, + { + "epoch": 114.72, + "learning_rate": 2.1320973348783317e-05, + "loss": 0.4273, + "step": 396000 + }, + { + "epoch": 114.86, + "learning_rate": 2.128476245654693e-05, + "loss": 0.4367, + "step": 396500 + }, + { + "epoch": 115.01, + "learning_rate": 2.1248551564310545e-05, + "loss": 0.4666, + "step": 397000 + }, + { + "epoch": 115.15, + "learning_rate": 2.121234067207416e-05, + "loss": 0.4519, + "step": 397500 + }, + { + "epoch": 115.3, + "learning_rate": 2.1176129779837774e-05, + "loss": 0.4253, + "step": 398000 + }, + { + "epoch": 115.44, + "learning_rate": 2.1139918887601394e-05, + "loss": 0.4376, + "step": 398500 + }, + { + "epoch": 115.59, + "learning_rate": 2.1103707995365006e-05, + "loss": 0.4602, + "step": 399000 + }, + { + "epoch": 115.73, + "learning_rate": 2.1067497103128622e-05, + "loss": 0.4096, + "step": 399500 + }, + { + "epoch": 115.87, + "learning_rate": 2.103128621089224e-05, + "loss": 0.4173, + "step": 400000 + }, + { + "epoch": 116.02, + "learning_rate": 2.099507531865585e-05, + "loss": 0.4236, + "step": 400500 + }, + { + "epoch": 116.16, + "learning_rate": 2.0958864426419467e-05, + "loss": 0.3931, + "step": 401000 + }, + { + "epoch": 116.31, + "learning_rate": 2.0922653534183083e-05, + "loss": 0.4301, + "step": 401500 + }, + { + "epoch": 116.45, + "learning_rate": 2.08864426419467e-05, + "loss": 0.4355, + "step": 402000 + }, + { + "epoch": 116.6, + "learning_rate": 2.0850231749710312e-05, + "loss": 0.4015, + "step": 402500 + }, + { + "epoch": 116.74, + "learning_rate": 2.081402085747393e-05, + "loss": 0.3858, + "step": 403000 + }, + { + "epoch": 116.89, + "learning_rate": 2.0777809965237544e-05, + "loss": 0.4162, + "step": 403500 + }, + { + "epoch": 117.03, + "learning_rate": 2.074159907300116e-05, + "loss": 0.4381, + "step": 404000 + }, + { + "epoch": 117.18, + "learning_rate": 2.0705388180764777e-05, + "loss": 0.4383, + "step": 404500 + }, + { + "epoch": 117.32, + "learning_rate": 2.066917728852839e-05, + "loss": 0.3823, + "step": 405000 + }, + { + "epoch": 117.47, + "learning_rate": 2.0632966396292005e-05, + "loss": 0.4065, + "step": 405500 + }, + { + "epoch": 117.61, + "learning_rate": 2.059675550405562e-05, + "loss": 0.4262, + "step": 406000 + }, + { + "epoch": 117.76, + "learning_rate": 2.0560544611819234e-05, + "loss": 0.4056, + "step": 406500 + }, + { + "epoch": 117.9, + "learning_rate": 2.0524333719582854e-05, + "loss": 0.4442, + "step": 407000 + }, + { + "epoch": 118.05, + "learning_rate": 2.0488122827346466e-05, + "loss": 0.453, + "step": 407500 + }, + { + "epoch": 118.19, + "learning_rate": 2.0451911935110083e-05, + "loss": 0.4199, + "step": 408000 + }, + { + "epoch": 118.34, + "learning_rate": 2.04157010428737e-05, + "loss": 0.3881, + "step": 408500 + }, + { + "epoch": 118.48, + "learning_rate": 2.037949015063731e-05, + "loss": 0.4093, + "step": 409000 + }, + { + "epoch": 118.63, + "learning_rate": 2.0343279258400927e-05, + "loss": 0.3842, + "step": 409500 + }, + { + "epoch": 118.77, + "learning_rate": 2.0307068366164544e-05, + "loss": 0.3937, + "step": 410000 + }, + { + "epoch": 118.92, + "learning_rate": 2.027085747392816e-05, + "loss": 0.4245, + "step": 410500 + }, + { + "epoch": 119.06, + "learning_rate": 2.0234646581691776e-05, + "loss": 0.3956, + "step": 411000 + }, + { + "epoch": 119.21, + "learning_rate": 2.019843568945539e-05, + "loss": 0.3782, + "step": 411500 + }, + { + "epoch": 119.35, + "learning_rate": 2.0162224797219005e-05, + "loss": 0.3795, + "step": 412000 + }, + { + "epoch": 119.5, + "learning_rate": 2.0126013904982617e-05, + "loss": 0.3825, + "step": 412500 + }, + { + "epoch": 119.64, + "learning_rate": 2.0089803012746237e-05, + "loss": 0.3702, + "step": 413000 + }, + { + "epoch": 119.79, + "learning_rate": 2.005359212050985e-05, + "loss": 0.3629, + "step": 413500 + }, + { + "epoch": 119.93, + "learning_rate": 2.0017381228273466e-05, + "loss": 0.4095, + "step": 414000 + }, + { + "epoch": 120.08, + "learning_rate": 1.998117033603708e-05, + "loss": 0.3685, + "step": 414500 + }, + { + "epoch": 120.22, + "learning_rate": 1.9944959443800694e-05, + "loss": 0.4241, + "step": 415000 + }, + { + "epoch": 120.37, + "learning_rate": 1.990874855156431e-05, + "loss": 0.3681, + "step": 415500 + }, + { + "epoch": 120.51, + "learning_rate": 1.9872537659327926e-05, + "loss": 0.3791, + "step": 416000 + }, + { + "epoch": 120.65, + "learning_rate": 1.9836326767091543e-05, + "loss": 0.3634, + "step": 416500 + }, + { + "epoch": 120.8, + "learning_rate": 1.980011587485516e-05, + "loss": 0.4004, + "step": 417000 + }, + { + "epoch": 120.94, + "learning_rate": 1.976390498261877e-05, + "loss": 0.3745, + "step": 417500 + }, + { + "epoch": 121.09, + "learning_rate": 1.9727694090382387e-05, + "loss": 0.3886, + "step": 418000 + }, + { + "epoch": 121.23, + "learning_rate": 1.9691483198146004e-05, + "loss": 0.3633, + "step": 418500 + }, + { + "epoch": 121.38, + "learning_rate": 1.965527230590962e-05, + "loss": 0.3649, + "step": 419000 + }, + { + "epoch": 121.52, + "learning_rate": 1.9619061413673236e-05, + "loss": 0.3719, + "step": 419500 + }, + { + "epoch": 121.67, + "learning_rate": 1.958285052143685e-05, + "loss": 0.4032, + "step": 420000 + }, + { + "epoch": 121.81, + "learning_rate": 1.9546639629200465e-05, + "loss": 0.378, + "step": 420500 + }, + { + "epoch": 121.96, + "learning_rate": 1.9510428736964077e-05, + "loss": 0.3737, + "step": 421000 + }, + { + "epoch": 122.1, + "learning_rate": 1.9474217844727697e-05, + "loss": 0.3599, + "step": 421500 + }, + { + "epoch": 122.25, + "learning_rate": 1.943800695249131e-05, + "loss": 0.3806, + "step": 422000 + }, + { + "epoch": 122.39, + "learning_rate": 1.9401796060254926e-05, + "loss": 0.3456, + "step": 422500 + }, + { + "epoch": 122.54, + "learning_rate": 1.936558516801854e-05, + "loss": 0.3443, + "step": 423000 + }, + { + "epoch": 122.68, + "learning_rate": 1.9329374275782154e-05, + "loss": 0.3855, + "step": 423500 + }, + { + "epoch": 122.83, + "learning_rate": 1.929316338354577e-05, + "loss": 0.3958, + "step": 424000 + }, + { + "epoch": 122.97, + "learning_rate": 1.9256952491309387e-05, + "loss": 0.3696, + "step": 424500 + }, + { + "epoch": 123.12, + "learning_rate": 1.9220741599073003e-05, + "loss": 0.3616, + "step": 425000 + }, + { + "epoch": 123.26, + "learning_rate": 1.918453070683662e-05, + "loss": 0.356, + "step": 425500 + }, + { + "epoch": 123.41, + "learning_rate": 1.914831981460023e-05, + "loss": 0.4097, + "step": 426000 + }, + { + "epoch": 123.55, + "learning_rate": 1.9112108922363848e-05, + "loss": 0.3496, + "step": 426500 + }, + { + "epoch": 123.7, + "learning_rate": 1.9075898030127464e-05, + "loss": 0.3326, + "step": 427000 + }, + { + "epoch": 123.84, + "learning_rate": 1.903968713789108e-05, + "loss": 0.3807, + "step": 427500 + }, + { + "epoch": 123.99, + "learning_rate": 1.9003476245654696e-05, + "loss": 0.4034, + "step": 428000 + }, + { + "epoch": 124.13, + "learning_rate": 1.896726535341831e-05, + "loss": 0.3497, + "step": 428500 + }, + { + "epoch": 124.28, + "learning_rate": 1.8931054461181925e-05, + "loss": 0.3475, + "step": 429000 + }, + { + "epoch": 124.42, + "learning_rate": 1.8894843568945537e-05, + "loss": 0.3313, + "step": 429500 + }, + { + "epoch": 124.57, + "learning_rate": 1.8858632676709153e-05, + "loss": 0.369, + "step": 430000 + }, + { + "epoch": 124.71, + "learning_rate": 1.8822421784472773e-05, + "loss": 0.3487, + "step": 430500 + }, + { + "epoch": 124.86, + "learning_rate": 1.8786210892236386e-05, + "loss": 0.329, + "step": 431000 + }, + { + "epoch": 125.0, + "learning_rate": 1.8750000000000002e-05, + "loss": 0.3298, + "step": 431500 + }, + { + "epoch": 125.14, + "learning_rate": 1.8713789107763614e-05, + "loss": 0.3246, + "step": 432000 + }, + { + "epoch": 125.29, + "learning_rate": 1.867757821552723e-05, + "loss": 0.3272, + "step": 432500 + }, + { + "epoch": 125.43, + "learning_rate": 1.8641367323290847e-05, + "loss": 0.3454, + "step": 433000 + }, + { + "epoch": 125.58, + "learning_rate": 1.8605156431054463e-05, + "loss": 0.3547, + "step": 433500 + }, + { + "epoch": 125.72, + "learning_rate": 1.856894553881808e-05, + "loss": 0.3334, + "step": 434000 + }, + { + "epoch": 125.87, + "learning_rate": 1.853273464658169e-05, + "loss": 0.3776, + "step": 434500 + }, + { + "epoch": 126.01, + "learning_rate": 1.8496523754345308e-05, + "loss": 0.3607, + "step": 435000 + }, + { + "epoch": 126.16, + "learning_rate": 1.8460312862108924e-05, + "loss": 0.329, + "step": 435500 + }, + { + "epoch": 126.3, + "learning_rate": 1.842410196987254e-05, + "loss": 0.33, + "step": 436000 + }, + { + "epoch": 126.45, + "learning_rate": 1.8387891077636156e-05, + "loss": 0.3466, + "step": 436500 + }, + { + "epoch": 126.59, + "learning_rate": 1.835168018539977e-05, + "loss": 0.3597, + "step": 437000 + }, + { + "epoch": 126.74, + "learning_rate": 1.8315469293163385e-05, + "loss": 0.3248, + "step": 437500 + }, + { + "epoch": 126.88, + "learning_rate": 1.8279258400926997e-05, + "loss": 0.3396, + "step": 438000 + }, + { + "epoch": 127.03, + "learning_rate": 1.8243047508690614e-05, + "loss": 0.3413, + "step": 438500 + }, + { + "epoch": 127.17, + "learning_rate": 1.8206836616454233e-05, + "loss": 0.3249, + "step": 439000 + }, + { + "epoch": 127.32, + "learning_rate": 1.8170625724217846e-05, + "loss": 0.3022, + "step": 439500 + }, + { + "epoch": 127.46, + "learning_rate": 1.8134414831981462e-05, + "loss": 0.308, + "step": 440000 + }, + { + "epoch": 127.61, + "learning_rate": 1.8098203939745075e-05, + "loss": 0.3446, + "step": 440500 + }, + { + "epoch": 127.75, + "learning_rate": 1.806199304750869e-05, + "loss": 0.3335, + "step": 441000 + }, + { + "epoch": 127.9, + "learning_rate": 1.8025782155272307e-05, + "loss": 0.335, + "step": 441500 + }, + { + "epoch": 128.04, + "learning_rate": 1.7989571263035923e-05, + "loss": 0.3059, + "step": 442000 + }, + { + "epoch": 128.19, + "learning_rate": 1.795336037079954e-05, + "loss": 0.3314, + "step": 442500 + }, + { + "epoch": 128.33, + "learning_rate": 1.791714947856315e-05, + "loss": 0.3004, + "step": 443000 + }, + { + "epoch": 128.48, + "learning_rate": 1.7880938586326768e-05, + "loss": 0.3249, + "step": 443500 + }, + { + "epoch": 128.62, + "learning_rate": 1.7844727694090384e-05, + "loss": 0.3198, + "step": 444000 + }, + { + "epoch": 128.77, + "learning_rate": 1.7808516801853997e-05, + "loss": 0.3394, + "step": 444500 + }, + { + "epoch": 128.91, + "learning_rate": 1.7772305909617616e-05, + "loss": 0.3075, + "step": 445000 + }, + { + "epoch": 129.06, + "learning_rate": 1.773609501738123e-05, + "loss": 0.334, + "step": 445500 + }, + { + "epoch": 129.2, + "learning_rate": 1.7699884125144845e-05, + "loss": 0.306, + "step": 446000 + }, + { + "epoch": 129.35, + "learning_rate": 1.766367323290846e-05, + "loss": 0.3146, + "step": 446500 + }, + { + "epoch": 129.49, + "learning_rate": 1.7627462340672074e-05, + "loss": 0.3068, + "step": 447000 + }, + { + "epoch": 129.63, + "learning_rate": 1.7591251448435693e-05, + "loss": 0.3124, + "step": 447500 + }, + { + "epoch": 129.78, + "learning_rate": 1.7555040556199306e-05, + "loss": 0.3082, + "step": 448000 + }, + { + "epoch": 129.92, + "learning_rate": 1.7518829663962922e-05, + "loss": 0.3215, + "step": 448500 + }, + { + "epoch": 130.07, + "learning_rate": 1.7482618771726535e-05, + "loss": 0.3114, + "step": 449000 + }, + { + "epoch": 130.21, + "learning_rate": 1.744640787949015e-05, + "loss": 0.3228, + "step": 449500 + }, + { + "epoch": 130.36, + "learning_rate": 1.7410196987253767e-05, + "loss": 0.3281, + "step": 450000 + }, + { + "epoch": 130.5, + "learning_rate": 1.7373986095017383e-05, + "loss": 0.2865, + "step": 450500 + }, + { + "epoch": 130.65, + "learning_rate": 1.7337775202781e-05, + "loss": 0.3214, + "step": 451000 + }, + { + "epoch": 130.79, + "learning_rate": 1.730156431054461e-05, + "loss": 0.3105, + "step": 451500 + }, + { + "epoch": 130.94, + "learning_rate": 1.7265353418308228e-05, + "loss": 0.3095, + "step": 452000 + }, + { + "epoch": 131.08, + "learning_rate": 1.7229142526071844e-05, + "loss": 0.3034, + "step": 452500 + }, + { + "epoch": 131.23, + "learning_rate": 1.7192931633835457e-05, + "loss": 0.3057, + "step": 453000 + }, + { + "epoch": 131.37, + "learning_rate": 1.7156720741599076e-05, + "loss": 0.2957, + "step": 453500 + }, + { + "epoch": 131.52, + "learning_rate": 1.712050984936269e-05, + "loss": 0.312, + "step": 454000 + }, + { + "epoch": 131.66, + "learning_rate": 1.7084298957126305e-05, + "loss": 0.29, + "step": 454500 + }, + { + "epoch": 131.81, + "learning_rate": 1.704808806488992e-05, + "loss": 0.2816, + "step": 455000 + }, + { + "epoch": 131.95, + "learning_rate": 1.7011877172653534e-05, + "loss": 0.3185, + "step": 455500 + }, + { + "epoch": 132.1, + "learning_rate": 1.697566628041715e-05, + "loss": 0.298, + "step": 456000 + }, + { + "epoch": 132.24, + "learning_rate": 1.6939455388180766e-05, + "loss": 0.2794, + "step": 456500 + }, + { + "epoch": 132.39, + "learning_rate": 1.6903244495944382e-05, + "loss": 0.29, + "step": 457000 + }, + { + "epoch": 132.53, + "learning_rate": 1.6867033603707995e-05, + "loss": 0.3004, + "step": 457500 + }, + { + "epoch": 132.68, + "learning_rate": 1.683082271147161e-05, + "loss": 0.3047, + "step": 458000 + }, + { + "epoch": 132.82, + "learning_rate": 1.6794611819235227e-05, + "loss": 0.3057, + "step": 458500 + }, + { + "epoch": 132.97, + "learning_rate": 1.675840092699884e-05, + "loss": 0.2959, + "step": 459000 + }, + { + "epoch": 133.11, + "learning_rate": 1.672219003476246e-05, + "loss": 0.317, + "step": 459500 + }, + { + "epoch": 133.26, + "learning_rate": 1.6685979142526072e-05, + "loss": 0.3087, + "step": 460000 + }, + { + "epoch": 133.4, + "learning_rate": 1.6649768250289688e-05, + "loss": 0.2817, + "step": 460500 + }, + { + "epoch": 133.55, + "learning_rate": 1.6613557358053304e-05, + "loss": 0.2774, + "step": 461000 + }, + { + "epoch": 133.69, + "learning_rate": 1.6577346465816917e-05, + "loss": 0.2932, + "step": 461500 + }, + { + "epoch": 133.84, + "learning_rate": 1.6541135573580536e-05, + "loss": 0.2784, + "step": 462000 + }, + { + "epoch": 133.98, + "learning_rate": 1.650492468134415e-05, + "loss": 0.3047, + "step": 462500 + }, + { + "epoch": 134.13, + "learning_rate": 1.6468713789107765e-05, + "loss": 0.2939, + "step": 463000 + }, + { + "epoch": 134.27, + "learning_rate": 1.643250289687138e-05, + "loss": 0.2729, + "step": 463500 + }, + { + "epoch": 134.41, + "learning_rate": 1.6396292004634994e-05, + "loss": 0.2736, + "step": 464000 + }, + { + "epoch": 134.56, + "learning_rate": 1.636008111239861e-05, + "loss": 0.2993, + "step": 464500 + }, + { + "epoch": 134.7, + "learning_rate": 1.6323870220162226e-05, + "loss": 0.285, + "step": 465000 + }, + { + "epoch": 134.85, + "learning_rate": 1.6287659327925842e-05, + "loss": 0.2693, + "step": 465500 + }, + { + "epoch": 134.99, + "learning_rate": 1.6251448435689455e-05, + "loss": 0.2976, + "step": 466000 + }, + { + "epoch": 135.14, + "learning_rate": 1.621523754345307e-05, + "loss": 0.2804, + "step": 466500 + }, + { + "epoch": 135.28, + "learning_rate": 1.6179026651216687e-05, + "loss": 0.2843, + "step": 467000 + }, + { + "epoch": 135.43, + "learning_rate": 1.61428157589803e-05, + "loss": 0.2744, + "step": 467500 + }, + { + "epoch": 135.57, + "learning_rate": 1.610660486674392e-05, + "loss": 0.2793, + "step": 468000 + }, + { + "epoch": 135.72, + "learning_rate": 1.6070393974507532e-05, + "loss": 0.285, + "step": 468500 + }, + { + "epoch": 135.86, + "learning_rate": 1.6034183082271148e-05, + "loss": 0.2775, + "step": 469000 + }, + { + "epoch": 136.01, + "learning_rate": 1.5997972190034764e-05, + "loss": 0.2836, + "step": 469500 + }, + { + "epoch": 136.15, + "learning_rate": 1.5961761297798377e-05, + "loss": 0.2635, + "step": 470000 + }, + { + "epoch": 136.3, + "learning_rate": 1.5925550405561993e-05, + "loss": 0.2745, + "step": 470500 + }, + { + "epoch": 136.44, + "learning_rate": 1.588933951332561e-05, + "loss": 0.264, + "step": 471000 + }, + { + "epoch": 136.59, + "learning_rate": 1.5853128621089225e-05, + "loss": 0.2761, + "step": 471500 + }, + { + "epoch": 136.73, + "learning_rate": 1.581691772885284e-05, + "loss": 0.2966, + "step": 472000 + }, + { + "epoch": 136.88, + "learning_rate": 1.5780706836616454e-05, + "loss": 0.2563, + "step": 472500 + }, + { + "epoch": 137.02, + "learning_rate": 1.574449594438007e-05, + "loss": 0.3046, + "step": 473000 + }, + { + "epoch": 137.17, + "learning_rate": 1.5708285052143686e-05, + "loss": 0.2558, + "step": 473500 + }, + { + "epoch": 137.31, + "learning_rate": 1.5672074159907302e-05, + "loss": 0.2572, + "step": 474000 + }, + { + "epoch": 137.46, + "learning_rate": 1.5635863267670918e-05, + "loss": 0.2545, + "step": 474500 + }, + { + "epoch": 137.6, + "learning_rate": 1.559965237543453e-05, + "loss": 0.2851, + "step": 475000 + }, + { + "epoch": 137.75, + "learning_rate": 1.5563441483198147e-05, + "loss": 0.2573, + "step": 475500 + }, + { + "epoch": 137.89, + "learning_rate": 1.552723059096176e-05, + "loss": 0.3079, + "step": 476000 + }, + { + "epoch": 138.04, + "learning_rate": 1.549101969872538e-05, + "loss": 0.2508, + "step": 476500 + }, + { + "epoch": 138.18, + "learning_rate": 1.5454808806488992e-05, + "loss": 0.2452, + "step": 477000 + }, + { + "epoch": 138.33, + "learning_rate": 1.5418597914252608e-05, + "loss": 0.2633, + "step": 477500 + }, + { + "epoch": 138.47, + "learning_rate": 1.5382387022016224e-05, + "loss": 0.2569, + "step": 478000 + }, + { + "epoch": 138.62, + "learning_rate": 1.5346176129779837e-05, + "loss": 0.2591, + "step": 478500 + }, + { + "epoch": 138.76, + "learning_rate": 1.5309965237543453e-05, + "loss": 0.272, + "step": 479000 + }, + { + "epoch": 138.9, + "learning_rate": 1.527375434530707e-05, + "loss": 0.2495, + "step": 479500 + }, + { + "epoch": 139.05, + "learning_rate": 1.5237543453070685e-05, + "loss": 0.2988, + "step": 480000 + }, + { + "epoch": 139.19, + "learning_rate": 1.52013325608343e-05, + "loss": 0.255, + "step": 480500 + }, + { + "epoch": 139.34, + "learning_rate": 1.5165121668597914e-05, + "loss": 0.2734, + "step": 481000 + }, + { + "epoch": 139.48, + "learning_rate": 1.512891077636153e-05, + "loss": 0.2574, + "step": 481500 + }, + { + "epoch": 139.63, + "learning_rate": 1.5092699884125144e-05, + "loss": 0.2649, + "step": 482000 + }, + { + "epoch": 139.77, + "learning_rate": 1.5056488991888762e-05, + "loss": 0.2445, + "step": 482500 + }, + { + "epoch": 139.92, + "learning_rate": 1.5020278099652377e-05, + "loss": 0.2909, + "step": 483000 + }, + { + "epoch": 140.06, + "learning_rate": 1.4984067207415991e-05, + "loss": 0.2669, + "step": 483500 + }, + { + "epoch": 140.21, + "learning_rate": 1.4947856315179607e-05, + "loss": 0.2534, + "step": 484000 + }, + { + "epoch": 140.35, + "learning_rate": 1.4911645422943221e-05, + "loss": 0.2586, + "step": 484500 + }, + { + "epoch": 140.5, + "learning_rate": 1.4875434530706836e-05, + "loss": 0.2407, + "step": 485000 + }, + { + "epoch": 140.64, + "learning_rate": 1.4839223638470454e-05, + "loss": 0.2306, + "step": 485500 + }, + { + "epoch": 140.79, + "learning_rate": 1.4803012746234068e-05, + "loss": 0.2588, + "step": 486000 + }, + { + "epoch": 140.93, + "learning_rate": 1.4766801853997682e-05, + "loss": 0.249, + "step": 486500 + }, + { + "epoch": 141.08, + "learning_rate": 1.4730590961761299e-05, + "loss": 0.2863, + "step": 487000 + }, + { + "epoch": 141.22, + "learning_rate": 1.4694380069524913e-05, + "loss": 0.2298, + "step": 487500 + }, + { + "epoch": 141.37, + "learning_rate": 1.465816917728853e-05, + "loss": 0.226, + "step": 488000 + }, + { + "epoch": 141.51, + "learning_rate": 1.4621958285052145e-05, + "loss": 0.2447, + "step": 488500 + }, + { + "epoch": 141.66, + "learning_rate": 1.458574739281576e-05, + "loss": 0.2686, + "step": 489000 + }, + { + "epoch": 141.8, + "learning_rate": 1.4549536500579376e-05, + "loss": 0.2503, + "step": 489500 + }, + { + "epoch": 141.95, + "learning_rate": 1.451332560834299e-05, + "loss": 0.2746, + "step": 490000 + }, + { + "epoch": 142.09, + "learning_rate": 1.4477114716106604e-05, + "loss": 0.2151, + "step": 490500 + }, + { + "epoch": 142.24, + "learning_rate": 1.4440903823870222e-05, + "loss": 0.2437, + "step": 491000 + }, + { + "epoch": 142.38, + "learning_rate": 1.4404692931633837e-05, + "loss": 0.2303, + "step": 491500 + }, + { + "epoch": 142.53, + "learning_rate": 1.4368482039397451e-05, + "loss": 0.2443, + "step": 492000 + }, + { + "epoch": 142.67, + "learning_rate": 1.4332271147161067e-05, + "loss": 0.2637, + "step": 492500 + }, + { + "epoch": 142.82, + "learning_rate": 1.4296060254924682e-05, + "loss": 0.2514, + "step": 493000 + }, + { + "epoch": 142.96, + "learning_rate": 1.4259849362688296e-05, + "loss": 0.2576, + "step": 493500 + }, + { + "epoch": 143.11, + "learning_rate": 1.4223638470451914e-05, + "loss": 0.2524, + "step": 494000 + }, + { + "epoch": 143.25, + "learning_rate": 1.4187427578215528e-05, + "loss": 0.2366, + "step": 494500 + }, + { + "epoch": 143.4, + "learning_rate": 1.4151216685979144e-05, + "loss": 0.2462, + "step": 495000 + }, + { + "epoch": 143.54, + "learning_rate": 1.4115005793742759e-05, + "loss": 0.2367, + "step": 495500 + }, + { + "epoch": 143.68, + "learning_rate": 1.4078794901506373e-05, + "loss": 0.2321, + "step": 496000 + }, + { + "epoch": 143.83, + "learning_rate": 1.4042584009269987e-05, + "loss": 0.2467, + "step": 496500 + }, + { + "epoch": 143.97, + "learning_rate": 1.4006373117033605e-05, + "loss": 0.229, + "step": 497000 + }, + { + "epoch": 144.12, + "learning_rate": 1.397016222479722e-05, + "loss": 0.237, + "step": 497500 + }, + { + "epoch": 144.26, + "learning_rate": 1.3933951332560836e-05, + "loss": 0.2555, + "step": 498000 + }, + { + "epoch": 144.41, + "learning_rate": 1.389774044032445e-05, + "loss": 0.2545, + "step": 498500 + }, + { + "epoch": 144.55, + "learning_rate": 1.3861529548088065e-05, + "loss": 0.2443, + "step": 499000 + }, + { + "epoch": 144.7, + "learning_rate": 1.3825318655851679e-05, + "loss": 0.2723, + "step": 499500 + }, + { + "epoch": 144.84, + "learning_rate": 1.3789107763615297e-05, + "loss": 0.2444, + "step": 500000 + }, + { + "epoch": 144.99, + "learning_rate": 1.3752896871378911e-05, + "loss": 0.2326, + "step": 500500 + }, + { + "epoch": 145.13, + "learning_rate": 1.3716685979142527e-05, + "loss": 0.2243, + "step": 501000 + }, + { + "epoch": 145.28, + "learning_rate": 1.3680475086906142e-05, + "loss": 0.2313, + "step": 501500 + }, + { + "epoch": 145.42, + "learning_rate": 1.3644264194669756e-05, + "loss": 0.2395, + "step": 502000 + }, + { + "epoch": 145.57, + "learning_rate": 1.3608053302433374e-05, + "loss": 0.2327, + "step": 502500 + }, + { + "epoch": 145.71, + "learning_rate": 1.3571842410196988e-05, + "loss": 0.2246, + "step": 503000 + }, + { + "epoch": 145.86, + "learning_rate": 1.3535631517960604e-05, + "loss": 0.2169, + "step": 503500 + }, + { + "epoch": 146.0, + "learning_rate": 1.3499420625724219e-05, + "loss": 0.2386, + "step": 504000 + }, + { + "epoch": 146.15, + "learning_rate": 1.3463209733487833e-05, + "loss": 0.238, + "step": 504500 + }, + { + "epoch": 146.29, + "learning_rate": 1.3426998841251448e-05, + "loss": 0.2387, + "step": 505000 + }, + { + "epoch": 146.44, + "learning_rate": 1.3390787949015065e-05, + "loss": 0.2223, + "step": 505500 + }, + { + "epoch": 146.58, + "learning_rate": 1.335457705677868e-05, + "loss": 0.2103, + "step": 506000 + }, + { + "epoch": 146.73, + "learning_rate": 1.3318366164542296e-05, + "loss": 0.2209, + "step": 506500 + }, + { + "epoch": 146.87, + "learning_rate": 1.328215527230591e-05, + "loss": 0.2444, + "step": 507000 + }, + { + "epoch": 147.02, + "learning_rate": 1.3245944380069525e-05, + "loss": 0.2337, + "step": 507500 + }, + { + "epoch": 147.16, + "learning_rate": 1.3209733487833139e-05, + "loss": 0.2242, + "step": 508000 + }, + { + "epoch": 147.31, + "learning_rate": 1.3173522595596757e-05, + "loss": 0.2305, + "step": 508500 + }, + { + "epoch": 147.45, + "learning_rate": 1.3137311703360373e-05, + "loss": 0.216, + "step": 509000 + }, + { + "epoch": 147.6, + "learning_rate": 1.3101100811123987e-05, + "loss": 0.2132, + "step": 509500 + }, + { + "epoch": 147.74, + "learning_rate": 1.3064889918887602e-05, + "loss": 0.2571, + "step": 510000 + }, + { + "epoch": 147.89, + "learning_rate": 1.3028679026651216e-05, + "loss": 0.2287, + "step": 510500 + }, + { + "epoch": 148.03, + "learning_rate": 1.299246813441483e-05, + "loss": 0.2344, + "step": 511000 + }, + { + "epoch": 148.17, + "learning_rate": 1.2956257242178448e-05, + "loss": 0.2202, + "step": 511500 + }, + { + "epoch": 148.32, + "learning_rate": 1.2920046349942064e-05, + "loss": 0.1859, + "step": 512000 + }, + { + "epoch": 148.46, + "learning_rate": 1.2883835457705679e-05, + "loss": 0.1972, + "step": 512500 + }, + { + "epoch": 148.61, + "learning_rate": 1.2847624565469293e-05, + "loss": 0.2232, + "step": 513000 + }, + { + "epoch": 148.75, + "learning_rate": 1.2811413673232908e-05, + "loss": 0.2051, + "step": 513500 + }, + { + "epoch": 148.9, + "learning_rate": 1.2775202780996524e-05, + "loss": 0.2272, + "step": 514000 + }, + { + "epoch": 149.04, + "learning_rate": 1.273899188876014e-05, + "loss": 0.1866, + "step": 514500 + }, + { + "epoch": 149.19, + "learning_rate": 1.2702780996523756e-05, + "loss": 0.1998, + "step": 515000 + }, + { + "epoch": 149.33, + "learning_rate": 1.266657010428737e-05, + "loss": 0.1948, + "step": 515500 + }, + { + "epoch": 149.48, + "learning_rate": 1.2630359212050985e-05, + "loss": 0.2025, + "step": 516000 + }, + { + "epoch": 149.62, + "learning_rate": 1.2594148319814599e-05, + "loss": 0.2197, + "step": 516500 + }, + { + "epoch": 149.77, + "learning_rate": 1.2557937427578217e-05, + "loss": 0.2142, + "step": 517000 + }, + { + "epoch": 149.91, + "learning_rate": 1.2521726535341833e-05, + "loss": 0.2137, + "step": 517500 + }, + { + "epoch": 150.06, + "learning_rate": 1.2485515643105447e-05, + "loss": 0.2164, + "step": 518000 + }, + { + "epoch": 150.2, + "learning_rate": 1.2449304750869062e-05, + "loss": 0.1961, + "step": 518500 + }, + { + "epoch": 150.35, + "learning_rate": 1.2413093858632676e-05, + "loss": 0.2187, + "step": 519000 + }, + { + "epoch": 150.49, + "learning_rate": 1.2376882966396292e-05, + "loss": 0.1937, + "step": 519500 + }, + { + "epoch": 150.64, + "learning_rate": 1.2340672074159908e-05, + "loss": 0.2133, + "step": 520000 + }, + { + "epoch": 150.78, + "learning_rate": 1.2304461181923523e-05, + "loss": 0.2468, + "step": 520500 + }, + { + "epoch": 150.93, + "learning_rate": 1.2268250289687139e-05, + "loss": 0.2023, + "step": 521000 + }, + { + "epoch": 151.07, + "learning_rate": 1.2232039397450753e-05, + "loss": 0.2029, + "step": 521500 + }, + { + "epoch": 151.22, + "learning_rate": 1.219582850521437e-05, + "loss": 0.2073, + "step": 522000 + }, + { + "epoch": 151.36, + "learning_rate": 1.2159617612977984e-05, + "loss": 0.2003, + "step": 522500 + }, + { + "epoch": 151.51, + "learning_rate": 1.21234067207416e-05, + "loss": 0.2107, + "step": 523000 + }, + { + "epoch": 151.65, + "learning_rate": 1.2087195828505216e-05, + "loss": 0.2126, + "step": 523500 + }, + { + "epoch": 151.8, + "learning_rate": 1.205098493626883e-05, + "loss": 0.2001, + "step": 524000 + }, + { + "epoch": 151.94, + "learning_rate": 1.2014774044032445e-05, + "loss": 0.228, + "step": 524500 + }, + { + "epoch": 152.09, + "learning_rate": 1.1978563151796061e-05, + "loss": 0.2013, + "step": 525000 + }, + { + "epoch": 152.23, + "learning_rate": 1.1942352259559677e-05, + "loss": 0.2065, + "step": 525500 + }, + { + "epoch": 152.38, + "learning_rate": 1.1906141367323291e-05, + "loss": 0.1917, + "step": 526000 + }, + { + "epoch": 152.52, + "learning_rate": 1.1869930475086907e-05, + "loss": 0.2089, + "step": 526500 + }, + { + "epoch": 152.67, + "learning_rate": 1.1833719582850522e-05, + "loss": 0.1965, + "step": 527000 + }, + { + "epoch": 152.81, + "learning_rate": 1.1797508690614136e-05, + "loss": 0.2153, + "step": 527500 + }, + { + "epoch": 152.95, + "learning_rate": 1.1761297798377752e-05, + "loss": 0.1984, + "step": 528000 + }, + { + "epoch": 153.1, + "learning_rate": 1.1725086906141368e-05, + "loss": 0.1944, + "step": 528500 + }, + { + "epoch": 153.24, + "learning_rate": 1.1688876013904983e-05, + "loss": 0.2069, + "step": 529000 + }, + { + "epoch": 153.39, + "learning_rate": 1.1652665121668599e-05, + "loss": 0.1943, + "step": 529500 + }, + { + "epoch": 153.53, + "learning_rate": 1.1616454229432213e-05, + "loss": 0.1839, + "step": 530000 + }, + { + "epoch": 153.68, + "learning_rate": 1.1580243337195828e-05, + "loss": 0.2062, + "step": 530500 + }, + { + "epoch": 153.82, + "learning_rate": 1.1544032444959446e-05, + "loss": 0.2067, + "step": 531000 + }, + { + "epoch": 153.97, + "learning_rate": 1.150782155272306e-05, + "loss": 0.2113, + "step": 531500 + }, + { + "epoch": 154.11, + "learning_rate": 1.1471610660486674e-05, + "loss": 0.2214, + "step": 532000 + }, + { + "epoch": 154.26, + "learning_rate": 1.143539976825029e-05, + "loss": 0.203, + "step": 532500 + }, + { + "epoch": 154.4, + "learning_rate": 1.1399188876013905e-05, + "loss": 0.1725, + "step": 533000 + }, + { + "epoch": 154.55, + "learning_rate": 1.1362977983777521e-05, + "loss": 0.1898, + "step": 533500 + }, + { + "epoch": 154.69, + "learning_rate": 1.1326767091541137e-05, + "loss": 0.2152, + "step": 534000 + }, + { + "epoch": 154.84, + "learning_rate": 1.1290556199304751e-05, + "loss": 0.1984, + "step": 534500 + }, + { + "epoch": 154.98, + "learning_rate": 1.1254345307068366e-05, + "loss": 0.2074, + "step": 535000 + }, + { + "epoch": 155.13, + "learning_rate": 1.1218134414831982e-05, + "loss": 0.203, + "step": 535500 + }, + { + "epoch": 155.27, + "learning_rate": 1.1181923522595596e-05, + "loss": 0.2135, + "step": 536000 + }, + { + "epoch": 155.42, + "learning_rate": 1.1145712630359212e-05, + "loss": 0.191, + "step": 536500 + }, + { + "epoch": 155.56, + "learning_rate": 1.1109501738122829e-05, + "loss": 0.1882, + "step": 537000 + }, + { + "epoch": 155.71, + "learning_rate": 1.1073290845886443e-05, + "loss": 0.1999, + "step": 537500 + }, + { + "epoch": 155.85, + "learning_rate": 1.1037079953650059e-05, + "loss": 0.1769, + "step": 538000 + }, + { + "epoch": 156.0, + "learning_rate": 1.1000869061413673e-05, + "loss": 0.1962, + "step": 538500 + }, + { + "epoch": 156.14, + "learning_rate": 1.096465816917729e-05, + "loss": 0.1978, + "step": 539000 + }, + { + "epoch": 156.29, + "learning_rate": 1.0928447276940906e-05, + "loss": 0.1693, + "step": 539500 + }, + { + "epoch": 156.43, + "learning_rate": 1.089223638470452e-05, + "loss": 0.1777, + "step": 540000 + }, + { + "epoch": 156.58, + "learning_rate": 1.0856025492468134e-05, + "loss": 0.1803, + "step": 540500 + }, + { + "epoch": 156.72, + "learning_rate": 1.081981460023175e-05, + "loss": 0.1683, + "step": 541000 + }, + { + "epoch": 156.87, + "learning_rate": 1.0783603707995365e-05, + "loss": 0.1918, + "step": 541500 + }, + { + "epoch": 157.01, + "learning_rate": 1.0747392815758981e-05, + "loss": 0.1938, + "step": 542000 + }, + { + "epoch": 157.16, + "learning_rate": 1.0711181923522597e-05, + "loss": 0.1879, + "step": 542500 + }, + { + "epoch": 157.3, + "learning_rate": 1.0674971031286211e-05, + "loss": 0.1927, + "step": 543000 + }, + { + "epoch": 157.44, + "learning_rate": 1.0638760139049826e-05, + "loss": 0.1724, + "step": 543500 + }, + { + "epoch": 157.59, + "learning_rate": 1.0602549246813442e-05, + "loss": 0.1943, + "step": 544000 + }, + { + "epoch": 157.73, + "learning_rate": 1.0566338354577056e-05, + "loss": 0.172, + "step": 544500 + }, + { + "epoch": 157.88, + "learning_rate": 1.0530127462340672e-05, + "loss": 0.1979, + "step": 545000 + }, + { + "epoch": 158.02, + "learning_rate": 1.0493916570104289e-05, + "loss": 0.2234, + "step": 545500 + }, + { + "epoch": 158.17, + "learning_rate": 1.0457705677867903e-05, + "loss": 0.1712, + "step": 546000 + }, + { + "epoch": 158.31, + "learning_rate": 1.0421494785631517e-05, + "loss": 0.2199, + "step": 546500 + }, + { + "epoch": 158.46, + "learning_rate": 1.0385283893395133e-05, + "loss": 0.1846, + "step": 547000 + }, + { + "epoch": 158.6, + "learning_rate": 1.034907300115875e-05, + "loss": 0.1832, + "step": 547500 + }, + { + "epoch": 158.75, + "learning_rate": 1.0312862108922364e-05, + "loss": 0.1684, + "step": 548000 + }, + { + "epoch": 158.89, + "learning_rate": 1.027665121668598e-05, + "loss": 0.1741, + "step": 548500 + }, + { + "epoch": 159.04, + "learning_rate": 1.0240440324449594e-05, + "loss": 0.1623, + "step": 549000 + }, + { + "epoch": 159.18, + "learning_rate": 1.0204229432213209e-05, + "loss": 0.1776, + "step": 549500 + }, + { + "epoch": 159.33, + "learning_rate": 1.0168018539976825e-05, + "loss": 0.1863, + "step": 550000 + }, + { + "epoch": 159.47, + "learning_rate": 1.0131807647740441e-05, + "loss": 0.1685, + "step": 550500 + }, + { + "epoch": 159.62, + "learning_rate": 1.0095596755504057e-05, + "loss": 0.1973, + "step": 551000 + }, + { + "epoch": 159.76, + "learning_rate": 1.0059385863267672e-05, + "loss": 0.1711, + "step": 551500 + }, + { + "epoch": 159.91, + "learning_rate": 1.0023174971031286e-05, + "loss": 0.1814, + "step": 552000 + }, + { + "epoch": 160.05, + "learning_rate": 9.986964078794902e-06, + "loss": 0.167, + "step": 552500 + }, + { + "epoch": 160.2, + "learning_rate": 9.950753186558518e-06, + "loss": 0.1938, + "step": 553000 + }, + { + "epoch": 160.34, + "learning_rate": 9.914542294322133e-06, + "loss": 0.167, + "step": 553500 + }, + { + "epoch": 160.49, + "learning_rate": 9.878331402085749e-06, + "loss": 0.1617, + "step": 554000 + }, + { + "epoch": 160.63, + "learning_rate": 9.842120509849363e-06, + "loss": 0.172, + "step": 554500 + }, + { + "epoch": 160.78, + "learning_rate": 9.805909617612977e-06, + "loss": 0.1737, + "step": 555000 + }, + { + "epoch": 160.92, + "learning_rate": 9.769698725376594e-06, + "loss": 0.1816, + "step": 555500 + }, + { + "epoch": 161.07, + "learning_rate": 9.73348783314021e-06, + "loss": 0.1628, + "step": 556000 + }, + { + "epoch": 161.21, + "learning_rate": 9.697276940903824e-06, + "loss": 0.1696, + "step": 556500 + }, + { + "epoch": 161.36, + "learning_rate": 9.66106604866744e-06, + "loss": 0.1656, + "step": 557000 + }, + { + "epoch": 161.5, + "learning_rate": 9.624855156431055e-06, + "loss": 0.1559, + "step": 557500 + }, + { + "epoch": 161.65, + "learning_rate": 9.588644264194669e-06, + "loss": 0.172, + "step": 558000 + }, + { + "epoch": 161.79, + "learning_rate": 9.552433371958287e-06, + "loss": 0.1878, + "step": 558500 + }, + { + "epoch": 161.94, + "learning_rate": 9.516222479721901e-06, + "loss": 0.1682, + "step": 559000 + }, + { + "epoch": 162.08, + "learning_rate": 9.480011587485516e-06, + "loss": 0.1755, + "step": 559500 + }, + { + "epoch": 162.22, + "learning_rate": 9.443800695249132e-06, + "loss": 0.2092, + "step": 560000 + }, + { + "epoch": 162.37, + "learning_rate": 9.407589803012746e-06, + "loss": 0.1784, + "step": 560500 + }, + { + "epoch": 162.51, + "learning_rate": 9.371378910776362e-06, + "loss": 0.154, + "step": 561000 + }, + { + "epoch": 162.66, + "learning_rate": 9.335168018539978e-06, + "loss": 0.1892, + "step": 561500 + }, + { + "epoch": 162.8, + "learning_rate": 9.298957126303593e-06, + "loss": 0.1679, + "step": 562000 + }, + { + "epoch": 162.95, + "learning_rate": 9.262746234067207e-06, + "loss": 0.1699, + "step": 562500 + }, + { + "epoch": 163.09, + "learning_rate": 9.226535341830823e-06, + "loss": 0.1553, + "step": 563000 + }, + { + "epoch": 163.24, + "learning_rate": 9.190324449594438e-06, + "loss": 0.1792, + "step": 563500 + }, + { + "epoch": 163.38, + "learning_rate": 9.154113557358054e-06, + "loss": 0.1611, + "step": 564000 + }, + { + "epoch": 163.53, + "learning_rate": 9.11790266512167e-06, + "loss": 0.1682, + "step": 564500 + }, + { + "epoch": 163.67, + "learning_rate": 9.081691772885284e-06, + "loss": 0.1765, + "step": 565000 + }, + { + "epoch": 163.82, + "learning_rate": 9.0454808806489e-06, + "loss": 0.1928, + "step": 565500 + }, + { + "epoch": 163.96, + "learning_rate": 9.009269988412515e-06, + "loss": 0.1758, + "step": 566000 + }, + { + "epoch": 164.11, + "learning_rate": 8.97305909617613e-06, + "loss": 0.1599, + "step": 566500 + }, + { + "epoch": 164.25, + "learning_rate": 8.936848203939747e-06, + "loss": 0.1793, + "step": 567000 + }, + { + "epoch": 164.4, + "learning_rate": 8.900637311703361e-06, + "loss": 0.151, + "step": 567500 + }, + { + "epoch": 164.54, + "learning_rate": 8.864426419466976e-06, + "loss": 0.1545, + "step": 568000 + }, + { + "epoch": 164.69, + "learning_rate": 8.828215527230592e-06, + "loss": 0.1745, + "step": 568500 + }, + { + "epoch": 164.83, + "learning_rate": 8.792004634994206e-06, + "loss": 0.175, + "step": 569000 + }, + { + "epoch": 164.98, + "learning_rate": 8.755793742757822e-06, + "loss": 0.1685, + "step": 569500 + }, + { + "epoch": 165.12, + "learning_rate": 8.719582850521438e-06, + "loss": 0.1714, + "step": 570000 + }, + { + "epoch": 165.27, + "learning_rate": 8.683371958285053e-06, + "loss": 0.1596, + "step": 570500 + }, + { + "epoch": 165.41, + "learning_rate": 8.647161066048667e-06, + "loss": 0.1514, + "step": 571000 + }, + { + "epoch": 165.56, + "learning_rate": 8.610950173812283e-06, + "loss": 0.1698, + "step": 571500 + }, + { + "epoch": 165.7, + "learning_rate": 8.574739281575898e-06, + "loss": 0.1652, + "step": 572000 + }, + { + "epoch": 165.85, + "learning_rate": 8.538528389339514e-06, + "loss": 0.1615, + "step": 572500 + }, + { + "epoch": 165.99, + "learning_rate": 8.50231749710313e-06, + "loss": 0.1452, + "step": 573000 + }, + { + "epoch": 166.14, + "learning_rate": 8.466106604866744e-06, + "loss": 0.1636, + "step": 573500 + }, + { + "epoch": 166.28, + "learning_rate": 8.429895712630359e-06, + "loss": 0.165, + "step": 574000 + }, + { + "epoch": 166.43, + "learning_rate": 8.393684820393975e-06, + "loss": 0.1494, + "step": 574500 + }, + { + "epoch": 166.57, + "learning_rate": 8.35747392815759e-06, + "loss": 0.1576, + "step": 575000 + }, + { + "epoch": 166.71, + "learning_rate": 8.321263035921205e-06, + "loss": 0.1632, + "step": 575500 + }, + { + "epoch": 166.86, + "learning_rate": 8.285052143684821e-06, + "loss": 0.16, + "step": 576000 + }, + { + "epoch": 167.0, + "learning_rate": 8.248841251448436e-06, + "loss": 0.1682, + "step": 576500 + }, + { + "epoch": 167.15, + "learning_rate": 8.21263035921205e-06, + "loss": 0.1602, + "step": 577000 + }, + { + "epoch": 167.29, + "learning_rate": 8.176419466975666e-06, + "loss": 0.1463, + "step": 577500 + }, + { + "epoch": 167.44, + "learning_rate": 8.140208574739282e-06, + "loss": 0.1599, + "step": 578000 + }, + { + "epoch": 167.58, + "learning_rate": 8.103997682502898e-06, + "loss": 0.1446, + "step": 578500 + }, + { + "epoch": 167.73, + "learning_rate": 8.067786790266513e-06, + "loss": 0.1541, + "step": 579000 + }, + { + "epoch": 167.87, + "learning_rate": 8.031575898030127e-06, + "loss": 0.1572, + "step": 579500 + }, + { + "epoch": 168.02, + "learning_rate": 7.995365005793743e-06, + "loss": 0.1458, + "step": 580000 + }, + { + "epoch": 168.16, + "learning_rate": 7.95915411355736e-06, + "loss": 0.1426, + "step": 580500 + }, + { + "epoch": 168.31, + "learning_rate": 7.922943221320974e-06, + "loss": 0.1396, + "step": 581000 + }, + { + "epoch": 168.45, + "learning_rate": 7.88673232908459e-06, + "loss": 0.1583, + "step": 581500 + }, + { + "epoch": 168.6, + "learning_rate": 7.850521436848204e-06, + "loss": 0.1841, + "step": 582000 + }, + { + "epoch": 168.74, + "learning_rate": 7.814310544611819e-06, + "loss": 0.1544, + "step": 582500 + }, + { + "epoch": 168.89, + "learning_rate": 7.778099652375435e-06, + "loss": 0.167, + "step": 583000 + }, + { + "epoch": 169.03, + "learning_rate": 7.741888760139051e-06, + "loss": 0.1597, + "step": 583500 + }, + { + "epoch": 169.18, + "learning_rate": 7.705677867902665e-06, + "loss": 0.1523, + "step": 584000 + }, + { + "epoch": 169.32, + "learning_rate": 7.669466975666281e-06, + "loss": 0.1497, + "step": 584500 + }, + { + "epoch": 169.47, + "learning_rate": 7.633256083429896e-06, + "loss": 0.1541, + "step": 585000 + }, + { + "epoch": 169.61, + "learning_rate": 7.597045191193511e-06, + "loss": 0.1471, + "step": 585500 + }, + { + "epoch": 169.76, + "learning_rate": 7.560834298957127e-06, + "loss": 0.1812, + "step": 586000 + }, + { + "epoch": 169.9, + "learning_rate": 7.5246234067207415e-06, + "loss": 0.1556, + "step": 586500 + }, + { + "epoch": 170.05, + "learning_rate": 7.488412514484357e-06, + "loss": 0.1353, + "step": 587000 + }, + { + "epoch": 170.19, + "learning_rate": 7.452201622247973e-06, + "loss": 0.1334, + "step": 587500 + }, + { + "epoch": 170.34, + "learning_rate": 7.415990730011588e-06, + "loss": 0.1455, + "step": 588000 + }, + { + "epoch": 170.48, + "learning_rate": 7.3797798377752025e-06, + "loss": 0.1629, + "step": 588500 + }, + { + "epoch": 170.63, + "learning_rate": 7.343568945538819e-06, + "loss": 0.1388, + "step": 589000 + }, + { + "epoch": 170.77, + "learning_rate": 7.307358053302434e-06, + "loss": 0.159, + "step": 589500 + }, + { + "epoch": 170.92, + "learning_rate": 7.271147161066048e-06, + "loss": 0.157, + "step": 590000 + }, + { + "epoch": 171.06, + "learning_rate": 7.234936268829664e-06, + "loss": 0.1471, + "step": 590500 + }, + { + "epoch": 171.21, + "learning_rate": 7.19872537659328e-06, + "loss": 0.1879, + "step": 591000 + }, + { + "epoch": 171.35, + "learning_rate": 7.162514484356894e-06, + "loss": 0.1651, + "step": 591500 + }, + { + "epoch": 171.49, + "learning_rate": 7.12630359212051e-06, + "loss": 0.1657, + "step": 592000 + }, + { + "epoch": 171.64, + "learning_rate": 7.090092699884125e-06, + "loss": 0.1521, + "step": 592500 + }, + { + "epoch": 171.78, + "learning_rate": 7.0538818076477414e-06, + "loss": 0.1404, + "step": 593000 + }, + { + "epoch": 171.93, + "learning_rate": 7.017670915411356e-06, + "loss": 0.138, + "step": 593500 + }, + { + "epoch": 172.07, + "learning_rate": 6.981460023174971e-06, + "loss": 0.1545, + "step": 594000 + }, + { + "epoch": 172.22, + "learning_rate": 6.945249130938587e-06, + "loss": 0.1472, + "step": 594500 + }, + { + "epoch": 172.36, + "learning_rate": 6.9090382387022024e-06, + "loss": 0.1593, + "step": 595000 + }, + { + "epoch": 172.51, + "learning_rate": 6.872827346465817e-06, + "loss": 0.1461, + "step": 595500 + }, + { + "epoch": 172.65, + "learning_rate": 6.836616454229433e-06, + "loss": 0.1279, + "step": 596000 + }, + { + "epoch": 172.8, + "learning_rate": 6.800405561993048e-06, + "loss": 0.135, + "step": 596500 + }, + { + "epoch": 172.94, + "learning_rate": 6.764194669756663e-06, + "loss": 0.14, + "step": 597000 + }, + { + "epoch": 173.09, + "learning_rate": 6.727983777520279e-06, + "loss": 0.1307, + "step": 597500 + }, + { + "epoch": 173.23, + "learning_rate": 6.691772885283894e-06, + "loss": 0.1356, + "step": 598000 + }, + { + "epoch": 173.38, + "learning_rate": 6.655561993047508e-06, + "loss": 0.1466, + "step": 598500 + }, + { + "epoch": 173.52, + "learning_rate": 6.6193511008111244e-06, + "loss": 0.1361, + "step": 599000 + }, + { + "epoch": 173.67, + "learning_rate": 6.58314020857474e-06, + "loss": 0.1336, + "step": 599500 + }, + { + "epoch": 173.81, + "learning_rate": 6.546929316338354e-06, + "loss": 0.129, + "step": 600000 + }, + { + "epoch": 173.96, + "learning_rate": 6.51071842410197e-06, + "loss": 0.1637, + "step": 600500 + }, + { + "epoch": 174.1, + "learning_rate": 6.474507531865585e-06, + "loss": 0.1632, + "step": 601000 + }, + { + "epoch": 174.25, + "learning_rate": 6.4382966396292e-06, + "loss": 0.151, + "step": 601500 + }, + { + "epoch": 174.39, + "learning_rate": 6.402085747392817e-06, + "loss": 0.1487, + "step": 602000 + }, + { + "epoch": 174.54, + "learning_rate": 6.365874855156431e-06, + "loss": 0.1335, + "step": 602500 + }, + { + "epoch": 174.68, + "learning_rate": 6.329663962920046e-06, + "loss": 0.1338, + "step": 603000 + }, + { + "epoch": 174.83, + "learning_rate": 6.2934530706836625e-06, + "loss": 0.1461, + "step": 603500 + }, + { + "epoch": 174.97, + "learning_rate": 6.257242178447277e-06, + "loss": 0.1413, + "step": 604000 + }, + { + "epoch": 175.12, + "learning_rate": 6.221031286210892e-06, + "loss": 0.1545, + "step": 604500 + }, + { + "epoch": 175.26, + "learning_rate": 6.184820393974508e-06, + "loss": 0.1615, + "step": 605000 + }, + { + "epoch": 175.41, + "learning_rate": 6.148609501738123e-06, + "loss": 0.1325, + "step": 605500 + }, + { + "epoch": 175.55, + "learning_rate": 6.112398609501739e-06, + "loss": 0.1398, + "step": 606000 + }, + { + "epoch": 175.7, + "learning_rate": 6.076187717265354e-06, + "loss": 0.1413, + "step": 606500 + }, + { + "epoch": 175.84, + "learning_rate": 6.039976825028968e-06, + "loss": 0.159, + "step": 607000 + }, + { + "epoch": 175.98, + "learning_rate": 6.0037659327925845e-06, + "loss": 0.1591, + "step": 607500 + }, + { + "epoch": 176.13, + "learning_rate": 5.9675550405562e-06, + "loss": 0.141, + "step": 608000 + }, + { + "epoch": 176.27, + "learning_rate": 5.931344148319814e-06, + "loss": 0.1246, + "step": 608500 + }, + { + "epoch": 176.42, + "learning_rate": 5.89513325608343e-06, + "loss": 0.1274, + "step": 609000 + }, + { + "epoch": 176.56, + "learning_rate": 5.8589223638470455e-06, + "loss": 0.152, + "step": 609500 + }, + { + "epoch": 176.71, + "learning_rate": 5.822711471610661e-06, + "loss": 0.159, + "step": 610000 + }, + { + "epoch": 176.85, + "learning_rate": 5.786500579374276e-06, + "loss": 0.1337, + "step": 610500 + }, + { + "epoch": 177.0, + "learning_rate": 5.750289687137891e-06, + "loss": 0.1248, + "step": 611000 + }, + { + "epoch": 177.14, + "learning_rate": 5.7140787949015065e-06, + "loss": 0.1375, + "step": 611500 + }, + { + "epoch": 177.29, + "learning_rate": 5.677867902665122e-06, + "loss": 0.1386, + "step": 612000 + }, + { + "epoch": 177.43, + "learning_rate": 5.641657010428737e-06, + "loss": 0.1376, + "step": 612500 + }, + { + "epoch": 177.58, + "learning_rate": 5.605446118192353e-06, + "loss": 0.1288, + "step": 613000 + }, + { + "epoch": 177.72, + "learning_rate": 5.5692352259559675e-06, + "loss": 0.1624, + "step": 613500 + }, + { + "epoch": 177.87, + "learning_rate": 5.533024333719583e-06, + "loss": 0.1262, + "step": 614000 + }, + { + "epoch": 178.01, + "learning_rate": 5.496813441483199e-06, + "loss": 0.1688, + "step": 614500 + }, + { + "epoch": 178.16, + "learning_rate": 5.460602549246813e-06, + "loss": 0.1789, + "step": 615000 + }, + { + "epoch": 178.3, + "learning_rate": 5.4243916570104285e-06, + "loss": 0.1338, + "step": 615500 + }, + { + "epoch": 178.45, + "learning_rate": 5.3881807647740446e-06, + "loss": 0.14, + "step": 616000 + }, + { + "epoch": 178.59, + "learning_rate": 5.35196987253766e-06, + "loss": 0.1236, + "step": 616500 + }, + { + "epoch": 178.74, + "learning_rate": 5.315758980301275e-06, + "loss": 0.1449, + "step": 617000 + }, + { + "epoch": 178.88, + "learning_rate": 5.27954808806489e-06, + "loss": 0.129, + "step": 617500 + }, + { + "epoch": 179.03, + "learning_rate": 5.2433371958285056e-06, + "loss": 0.1373, + "step": 618000 + }, + { + "epoch": 179.17, + "learning_rate": 5.207126303592121e-06, + "loss": 0.121, + "step": 618500 + }, + { + "epoch": 179.32, + "learning_rate": 5.170915411355736e-06, + "loss": 0.1285, + "step": 619000 + }, + { + "epoch": 179.46, + "learning_rate": 5.134704519119351e-06, + "loss": 0.1462, + "step": 619500 + }, + { + "epoch": 179.61, + "learning_rate": 5.0984936268829666e-06, + "loss": 0.1212, + "step": 620000 + }, + { + "epoch": 179.75, + "learning_rate": 5.062282734646582e-06, + "loss": 0.138, + "step": 620500 + }, + { + "epoch": 179.9, + "learning_rate": 5.026071842410197e-06, + "loss": 0.1686, + "step": 621000 + }, + { + "epoch": 180.04, + "learning_rate": 4.989860950173812e-06, + "loss": 0.1319, + "step": 621500 + }, + { + "epoch": 180.19, + "learning_rate": 4.9536500579374276e-06, + "loss": 0.1327, + "step": 622000 + }, + { + "epoch": 180.33, + "learning_rate": 4.917439165701044e-06, + "loss": 0.1409, + "step": 622500 + }, + { + "epoch": 180.48, + "learning_rate": 4.881228273464659e-06, + "loss": 0.1282, + "step": 623000 + }, + { + "epoch": 180.62, + "learning_rate": 4.845017381228273e-06, + "loss": 0.1284, + "step": 623500 + }, + { + "epoch": 180.76, + "learning_rate": 4.808806488991889e-06, + "loss": 0.1314, + "step": 624000 + }, + { + "epoch": 180.91, + "learning_rate": 4.772595596755505e-06, + "loss": 0.1163, + "step": 624500 + }, + { + "epoch": 181.05, + "learning_rate": 4.736384704519119e-06, + "loss": 0.1055, + "step": 625000 + }, + { + "epoch": 181.2, + "learning_rate": 4.700173812282735e-06, + "loss": 0.1329, + "step": 625500 + }, + { + "epoch": 181.34, + "learning_rate": 4.66396292004635e-06, + "loss": 0.1317, + "step": 626000 + }, + { + "epoch": 181.49, + "learning_rate": 4.627752027809966e-06, + "loss": 0.1612, + "step": 626500 + }, + { + "epoch": 181.63, + "learning_rate": 4.591541135573581e-06, + "loss": 0.1269, + "step": 627000 + }, + { + "epoch": 181.78, + "learning_rate": 4.555330243337196e-06, + "loss": 0.1404, + "step": 627500 + }, + { + "epoch": 181.92, + "learning_rate": 4.519119351100811e-06, + "loss": 0.1434, + "step": 628000 + }, + { + "epoch": 182.07, + "learning_rate": 4.482908458864427e-06, + "loss": 0.1427, + "step": 628500 + }, + { + "epoch": 182.21, + "learning_rate": 4.446697566628042e-06, + "loss": 0.0988, + "step": 629000 + }, + { + "epoch": 182.36, + "learning_rate": 4.410486674391657e-06, + "loss": 0.1395, + "step": 629500 + }, + { + "epoch": 182.5, + "learning_rate": 4.374275782155272e-06, + "loss": 0.1365, + "step": 630000 + }, + { + "epoch": 182.65, + "learning_rate": 4.338064889918888e-06, + "loss": 0.1285, + "step": 630500 + }, + { + "epoch": 182.79, + "learning_rate": 4.301853997682504e-06, + "loss": 0.1275, + "step": 631000 + }, + { + "epoch": 182.94, + "learning_rate": 4.265643105446118e-06, + "loss": 0.1411, + "step": 631500 + }, + { + "epoch": 183.08, + "learning_rate": 4.229432213209733e-06, + "loss": 0.1175, + "step": 632000 + }, + { + "epoch": 183.23, + "learning_rate": 4.1932213209733495e-06, + "loss": 0.1253, + "step": 632500 + }, + { + "epoch": 183.37, + "learning_rate": 4.157010428736964e-06, + "loss": 0.1131, + "step": 633000 + }, + { + "epoch": 183.52, + "learning_rate": 4.12079953650058e-06, + "loss": 0.1263, + "step": 633500 + }, + { + "epoch": 183.66, + "learning_rate": 4.084588644264195e-06, + "loss": 0.1473, + "step": 634000 + }, + { + "epoch": 183.81, + "learning_rate": 4.04837775202781e-06, + "loss": 0.1214, + "step": 634500 + }, + { + "epoch": 183.95, + "learning_rate": 4.012166859791426e-06, + "loss": 0.1385, + "step": 635000 + }, + { + "epoch": 184.1, + "learning_rate": 3.975955967555041e-06, + "loss": 0.1243, + "step": 635500 + }, + { + "epoch": 184.24, + "learning_rate": 3.939745075318655e-06, + "loss": 0.1299, + "step": 636000 + }, + { + "epoch": 184.39, + "learning_rate": 3.9035341830822715e-06, + "loss": 0.1384, + "step": 636500 + }, + { + "epoch": 184.53, + "learning_rate": 3.867323290845887e-06, + "loss": 0.1267, + "step": 637000 + }, + { + "epoch": 184.68, + "learning_rate": 3.831112398609502e-06, + "loss": 0.1584, + "step": 637500 + }, + { + "epoch": 184.82, + "learning_rate": 3.794901506373117e-06, + "loss": 0.1168, + "step": 638000 + }, + { + "epoch": 184.97, + "learning_rate": 3.7586906141367325e-06, + "loss": 0.1163, + "step": 638500 + }, + { + "epoch": 185.11, + "learning_rate": 3.722479721900348e-06, + "loss": 0.1161, + "step": 639000 + }, + { + "epoch": 185.25, + "learning_rate": 3.686268829663963e-06, + "loss": 0.1268, + "step": 639500 + }, + { + "epoch": 185.4, + "learning_rate": 3.650057937427578e-06, + "loss": 0.1185, + "step": 640000 + }, + { + "epoch": 185.54, + "learning_rate": 3.613847045191194e-06, + "loss": 0.111, + "step": 640500 + }, + { + "epoch": 185.69, + "learning_rate": 3.5776361529548087e-06, + "loss": 0.1406, + "step": 641000 + }, + { + "epoch": 185.83, + "learning_rate": 3.5414252607184244e-06, + "loss": 0.1247, + "step": 641500 + }, + { + "epoch": 185.98, + "learning_rate": 3.5052143684820396e-06, + "loss": 0.1106, + "step": 642000 + }, + { + "epoch": 186.12, + "learning_rate": 3.4690034762456544e-06, + "loss": 0.1318, + "step": 642500 + }, + { + "epoch": 186.27, + "learning_rate": 3.43279258400927e-06, + "loss": 0.1082, + "step": 643000 + }, + { + "epoch": 186.41, + "learning_rate": 3.3965816917728854e-06, + "loss": 0.1264, + "step": 643500 + }, + { + "epoch": 186.56, + "learning_rate": 3.360370799536501e-06, + "loss": 0.1201, + "step": 644000 + }, + { + "epoch": 186.7, + "learning_rate": 3.324159907300116e-06, + "loss": 0.1267, + "step": 644500 + }, + { + "epoch": 186.85, + "learning_rate": 3.2879490150637315e-06, + "loss": 0.1309, + "step": 645000 + }, + { + "epoch": 186.99, + "learning_rate": 3.2517381228273468e-06, + "loss": 0.1151, + "step": 645500 + }, + { + "epoch": 187.14, + "learning_rate": 3.2155272305909616e-06, + "loss": 0.1188, + "step": 646000 + }, + { + "epoch": 187.28, + "learning_rate": 3.1793163383545773e-06, + "loss": 0.1265, + "step": 646500 + }, + { + "epoch": 187.43, + "learning_rate": 3.143105446118193e-06, + "loss": 0.119, + "step": 647000 + }, + { + "epoch": 187.57, + "learning_rate": 3.1068945538818078e-06, + "loss": 0.1231, + "step": 647500 + }, + { + "epoch": 187.72, + "learning_rate": 3.070683661645423e-06, + "loss": 0.1197, + "step": 648000 + }, + { + "epoch": 187.86, + "learning_rate": 3.0344727694090383e-06, + "loss": 0.1276, + "step": 648500 + }, + { + "epoch": 188.01, + "learning_rate": 2.998261877172654e-06, + "loss": 0.1248, + "step": 649000 + }, + { + "epoch": 188.15, + "learning_rate": 2.9620509849362688e-06, + "loss": 0.1389, + "step": 649500 + }, + { + "epoch": 188.3, + "learning_rate": 2.9258400926998844e-06, + "loss": 0.1192, + "step": 650000 + }, + { + "epoch": 188.44, + "learning_rate": 2.8896292004634997e-06, + "loss": 0.1129, + "step": 650500 + }, + { + "epoch": 188.59, + "learning_rate": 2.853418308227115e-06, + "loss": 0.1014, + "step": 651000 + }, + { + "epoch": 188.73, + "learning_rate": 2.81720741599073e-06, + "loss": 0.132, + "step": 651500 + }, + { + "epoch": 188.88, + "learning_rate": 2.7809965237543454e-06, + "loss": 0.1383, + "step": 652000 + }, + { + "epoch": 189.02, + "learning_rate": 2.744785631517961e-06, + "loss": 0.1387, + "step": 652500 + }, + { + "epoch": 189.17, + "learning_rate": 2.708574739281576e-06, + "loss": 0.1105, + "step": 653000 + }, + { + "epoch": 189.31, + "learning_rate": 2.672363847045191e-06, + "loss": 0.1212, + "step": 653500 + }, + { + "epoch": 189.46, + "learning_rate": 2.636152954808807e-06, + "loss": 0.1115, + "step": 654000 + }, + { + "epoch": 189.6, + "learning_rate": 2.599942062572422e-06, + "loss": 0.1256, + "step": 654500 + }, + { + "epoch": 189.75, + "learning_rate": 2.563731170336037e-06, + "loss": 0.1287, + "step": 655000 + }, + { + "epoch": 189.89, + "learning_rate": 2.5275202780996526e-06, + "loss": 0.117, + "step": 655500 + }, + { + "epoch": 190.03, + "learning_rate": 2.491309385863268e-06, + "loss": 0.1064, + "step": 656000 + }, + { + "epoch": 190.18, + "learning_rate": 2.455098493626883e-06, + "loss": 0.1338, + "step": 656500 + }, + { + "epoch": 190.32, + "learning_rate": 2.4188876013904983e-06, + "loss": 0.1168, + "step": 657000 + }, + { + "epoch": 190.47, + "learning_rate": 2.3826767091541136e-06, + "loss": 0.1258, + "step": 657500 + }, + { + "epoch": 190.61, + "learning_rate": 2.3464658169177293e-06, + "loss": 0.1021, + "step": 658000 + }, + { + "epoch": 190.76, + "learning_rate": 2.310254924681344e-06, + "loss": 0.1199, + "step": 658500 + }, + { + "epoch": 190.9, + "learning_rate": 2.2740440324449593e-06, + "loss": 0.1065, + "step": 659000 + }, + { + "epoch": 191.05, + "learning_rate": 2.237833140208575e-06, + "loss": 0.1153, + "step": 659500 + }, + { + "epoch": 191.19, + "learning_rate": 2.2016222479721903e-06, + "loss": 0.1176, + "step": 660000 + }, + { + "epoch": 191.34, + "learning_rate": 2.1654113557358055e-06, + "loss": 0.1151, + "step": 660500 + }, + { + "epoch": 191.48, + "learning_rate": 2.1292004634994208e-06, + "loss": 0.1117, + "step": 661000 + }, + { + "epoch": 191.63, + "learning_rate": 2.092989571263036e-06, + "loss": 0.1226, + "step": 661500 + }, + { + "epoch": 191.77, + "learning_rate": 2.0567786790266513e-06, + "loss": 0.0943, + "step": 662000 + }, + { + "epoch": 191.92, + "learning_rate": 2.0205677867902665e-06, + "loss": 0.1238, + "step": 662500 + }, + { + "epoch": 192.06, + "learning_rate": 1.984356894553882e-06, + "loss": 0.1108, + "step": 663000 + }, + { + "epoch": 192.21, + "learning_rate": 1.9481460023174974e-06, + "loss": 0.1135, + "step": 663500 + }, + { + "epoch": 192.35, + "learning_rate": 1.9119351100811122e-06, + "loss": 0.1023, + "step": 664000 + }, + { + "epoch": 192.5, + "learning_rate": 1.875724217844728e-06, + "loss": 0.107, + "step": 664500 + }, + { + "epoch": 192.64, + "learning_rate": 1.8395133256083432e-06, + "loss": 0.1089, + "step": 665000 + }, + { + "epoch": 192.79, + "learning_rate": 1.8033024333719582e-06, + "loss": 0.104, + "step": 665500 + }, + { + "epoch": 192.93, + "learning_rate": 1.7670915411355737e-06, + "loss": 0.1172, + "step": 666000 + }, + { + "epoch": 193.08, + "learning_rate": 1.730880648899189e-06, + "loss": 0.106, + "step": 666500 + }, + { + "epoch": 193.22, + "learning_rate": 1.6946697566628044e-06, + "loss": 0.0994, + "step": 667000 + }, + { + "epoch": 193.37, + "learning_rate": 1.6584588644264196e-06, + "loss": 0.105, + "step": 667500 + }, + { + "epoch": 193.51, + "learning_rate": 1.6222479721900347e-06, + "loss": 0.1141, + "step": 668000 + }, + { + "epoch": 193.66, + "learning_rate": 1.5860370799536503e-06, + "loss": 0.1127, + "step": 668500 + }, + { + "epoch": 193.8, + "learning_rate": 1.5498261877172654e-06, + "loss": 0.1112, + "step": 669000 + }, + { + "epoch": 193.95, + "learning_rate": 1.5136152954808808e-06, + "loss": 0.1333, + "step": 669500 + }, + { + "epoch": 194.09, + "learning_rate": 1.477404403244496e-06, + "loss": 0.1027, + "step": 670000 + }, + { + "epoch": 194.24, + "learning_rate": 1.4411935110081113e-06, + "loss": 0.1204, + "step": 670500 + }, + { + "epoch": 194.38, + "learning_rate": 1.4049826187717266e-06, + "loss": 0.116, + "step": 671000 + }, + { + "epoch": 194.52, + "learning_rate": 1.3687717265353418e-06, + "loss": 0.1152, + "step": 671500 + }, + { + "epoch": 194.67, + "learning_rate": 1.3325608342989573e-06, + "loss": 0.1211, + "step": 672000 + }, + { + "epoch": 194.81, + "learning_rate": 1.2963499420625725e-06, + "loss": 0.103, + "step": 672500 + }, + { + "epoch": 194.96, + "learning_rate": 1.2601390498261878e-06, + "loss": 0.1091, + "step": 673000 + }, + { + "epoch": 195.1, + "learning_rate": 1.223928157589803e-06, + "loss": 0.1153, + "step": 673500 + }, + { + "epoch": 195.25, + "learning_rate": 1.1877172653534185e-06, + "loss": 0.1098, + "step": 674000 + }, + { + "epoch": 195.39, + "learning_rate": 1.1515063731170337e-06, + "loss": 0.1263, + "step": 674500 + }, + { + "epoch": 195.54, + "learning_rate": 1.115295480880649e-06, + "loss": 0.1176, + "step": 675000 + }, + { + "epoch": 195.68, + "learning_rate": 1.0790845886442642e-06, + "loss": 0.1088, + "step": 675500 + }, + { + "epoch": 195.83, + "learning_rate": 1.0428736964078795e-06, + "loss": 0.1062, + "step": 676000 + }, + { + "epoch": 195.97, + "learning_rate": 1.006662804171495e-06, + "loss": 0.1102, + "step": 676500 + }, + { + "epoch": 196.12, + "learning_rate": 9.7045191193511e-07, + "loss": 0.1305, + "step": 677000 + }, + { + "epoch": 196.26, + "learning_rate": 9.342410196987254e-07, + "loss": 0.1213, + "step": 677500 + }, + { + "epoch": 196.41, + "learning_rate": 8.980301274623407e-07, + "loss": 0.1173, + "step": 678000 + }, + { + "epoch": 196.55, + "learning_rate": 8.61819235225956e-07, + "loss": 0.1198, + "step": 678500 + }, + { + "epoch": 196.7, + "learning_rate": 8.256083429895712e-07, + "loss": 0.1099, + "step": 679000 + }, + { + "epoch": 196.84, + "learning_rate": 7.893974507531865e-07, + "loss": 0.1051, + "step": 679500 + }, + { + "epoch": 196.99, + "learning_rate": 7.531865585168019e-07, + "loss": 0.11, + "step": 680000 + } + ], + "max_steps": 690400, + "num_train_epochs": 200, + "total_flos": 3.310289191999795e+16, + "trial_name": null, + "trial_params": null +}