{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.929814973846515, "global_step": 980000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.4999999999999998e-06, "loss": 0.9292, "step": 500 }, { "epoch": 0.01, "learning_rate": 2.9999999999999997e-06, "loss": 0.7143, "step": 1000 }, { "epoch": 0.01, "eval_loss": 0.6849376559257507, "eval_runtime": 2.3913, "eval_samples_per_second": 960.554, "eval_steps_per_second": 15.054, "step": 1000 }, { "epoch": 0.02, "learning_rate": 4.499999999999999e-06, "loss": 0.6841, "step": 1500 }, { "epoch": 0.02, "learning_rate": 5.999999999999999e-06, "loss": 0.6817, "step": 2000 }, { "epoch": 0.02, "eval_loss": 0.678993284702301, "eval_runtime": 2.3276, "eval_samples_per_second": 986.872, "eval_steps_per_second": 15.467, "step": 2000 }, { "epoch": 0.03, "learning_rate": 7.499999999999999e-06, "loss": 0.6813, "step": 2500 }, { "epoch": 0.03, "learning_rate": 8.999999999999999e-06, "loss": 0.6806, "step": 3000 }, { "epoch": 0.03, "eval_loss": 0.6789333820343018, "eval_runtime": 2.3788, "eval_samples_per_second": 965.594, "eval_steps_per_second": 15.133, "step": 3000 }, { "epoch": 0.04, "learning_rate": 1.05e-05, "loss": 0.6806, "step": 3500 }, { "epoch": 0.04, "learning_rate": 1.1999999999999999e-05, "loss": 0.6804, "step": 4000 }, { "epoch": 0.04, "eval_loss": 0.6781470775604248, "eval_runtime": 2.452, "eval_samples_per_second": 936.772, "eval_steps_per_second": 14.682, "step": 4000 }, { "epoch": 0.05, "learning_rate": 1.3499999999999998e-05, "loss": 0.6799, "step": 4500 }, { "epoch": 0.06, "learning_rate": 1.4999999999999999e-05, "loss": 0.6797, "step": 5000 }, { "epoch": 0.06, "eval_loss": 0.6779675483703613, "eval_runtime": 2.3912, "eval_samples_per_second": 960.599, "eval_steps_per_second": 15.055, "step": 5000 }, { "epoch": 0.06, "learning_rate": 1.6499999999999998e-05, "loss": 0.6796, "step": 5500 }, { "epoch": 0.07, "learning_rate": 1.7999999999999997e-05, "loss": 0.6793, "step": 6000 }, { "epoch": 0.07, "eval_loss": 0.677739143371582, "eval_runtime": 2.3548, "eval_samples_per_second": 975.453, "eval_steps_per_second": 15.288, "step": 6000 }, { "epoch": 0.07, "learning_rate": 1.95e-05, "loss": 0.6791, "step": 6500 }, { "epoch": 0.08, "learning_rate": 2.1e-05, "loss": 0.6787, "step": 7000 }, { "epoch": 0.08, "eval_loss": 0.6776532530784607, "eval_runtime": 2.3673, "eval_samples_per_second": 970.284, "eval_steps_per_second": 15.207, "step": 7000 }, { "epoch": 0.08, "learning_rate": 2.2499999999999998e-05, "loss": 0.6784, "step": 7500 }, { "epoch": 0.09, "learning_rate": 2.3999999999999997e-05, "loss": 0.6775, "step": 8000 }, { "epoch": 0.09, "eval_loss": 0.6731997728347778, "eval_runtime": 2.3959, "eval_samples_per_second": 958.715, "eval_steps_per_second": 15.026, "step": 8000 }, { "epoch": 0.09, "learning_rate": 2.55e-05, "loss": 0.6689, "step": 8500 }, { "epoch": 0.1, "learning_rate": 2.6999999999999996e-05, "loss": 0.6553, "step": 9000 }, { "epoch": 0.1, "eval_loss": 0.645298957824707, "eval_runtime": 2.3807, "eval_samples_per_second": 964.84, "eval_steps_per_second": 15.122, "step": 9000 }, { "epoch": 0.11, "learning_rate": 2.8499999999999998e-05, "loss": 0.6445, "step": 9500 }, { "epoch": 0.11, "learning_rate": 2.9999999999999997e-05, "loss": 0.6351, "step": 10000 }, { "epoch": 0.11, "eval_loss": 0.6254131197929382, "eval_runtime": 2.3999, "eval_samples_per_second": 957.11, "eval_steps_per_second": 15.0, "step": 10000 }, { "epoch": 0.12, "learning_rate": 3.149999999999999e-05, "loss": 0.6293, "step": 10500 }, { "epoch": 0.12, "learning_rate": 3.2999999999999996e-05, "loss": 0.6245, "step": 11000 }, { "epoch": 0.12, "eval_loss": 0.6135886311531067, "eval_runtime": 2.3072, "eval_samples_per_second": 995.595, "eval_steps_per_second": 15.604, "step": 11000 }, { "epoch": 0.13, "learning_rate": 3.45e-05, "loss": 0.6197, "step": 11500 }, { "epoch": 0.13, "learning_rate": 3.5999999999999994e-05, "loss": 0.6148, "step": 12000 }, { "epoch": 0.13, "eval_loss": 0.6036174297332764, "eval_runtime": 2.3862, "eval_samples_per_second": 962.612, "eval_steps_per_second": 15.087, "step": 12000 }, { "epoch": 0.14, "learning_rate": 3.75e-05, "loss": 0.6096, "step": 12500 }, { "epoch": 0.14, "learning_rate": 3.9e-05, "loss": 0.6046, "step": 13000 }, { "epoch": 0.14, "eval_loss": 0.5924868583679199, "eval_runtime": 2.386, "eval_samples_per_second": 962.685, "eval_steps_per_second": 15.088, "step": 13000 }, { "epoch": 0.15, "learning_rate": 4.05e-05, "loss": 0.5998, "step": 13500 }, { "epoch": 0.16, "learning_rate": 4.2e-05, "loss": 0.5951, "step": 14000 }, { "epoch": 0.16, "eval_loss": 0.5810989141464233, "eval_runtime": 2.4081, "eval_samples_per_second": 953.875, "eval_steps_per_second": 14.95, "step": 14000 }, { "epoch": 0.16, "learning_rate": 4.3499999999999993e-05, "loss": 0.5908, "step": 14500 }, { "epoch": 0.17, "learning_rate": 4.4999999999999996e-05, "loss": 0.586, "step": 15000 }, { "epoch": 0.17, "eval_loss": 0.5703989863395691, "eval_runtime": 2.3852, "eval_samples_per_second": 963.035, "eval_steps_per_second": 15.093, "step": 15000 }, { "epoch": 0.17, "learning_rate": 4.65e-05, "loss": 0.5815, "step": 15500 }, { "epoch": 0.18, "learning_rate": 4.7999999999999994e-05, "loss": 0.5769, "step": 16000 }, { "epoch": 0.18, "eval_loss": 0.5616013407707214, "eval_runtime": 2.3872, "eval_samples_per_second": 962.217, "eval_steps_per_second": 15.08, "step": 16000 }, { "epoch": 0.18, "learning_rate": 4.95e-05, "loss": 0.5722, "step": 16500 }, { "epoch": 0.19, "learning_rate": 5.1e-05, "loss": 0.5673, "step": 17000 }, { "epoch": 0.19, "eval_loss": 0.5503653287887573, "eval_runtime": 2.3451, "eval_samples_per_second": 979.501, "eval_steps_per_second": 15.351, "step": 17000 }, { "epoch": 0.2, "learning_rate": 5.2499999999999995e-05, "loss": 0.5616, "step": 17500 }, { "epoch": 0.2, "learning_rate": 5.399999999999999e-05, "loss": 0.5553, "step": 18000 }, { "epoch": 0.2, "eval_loss": 0.5397240519523621, "eval_runtime": 2.4358, "eval_samples_per_second": 943.009, "eval_steps_per_second": 14.779, "step": 18000 }, { "epoch": 0.21, "learning_rate": 5.5499999999999994e-05, "loss": 0.5491, "step": 18500 }, { "epoch": 0.21, "learning_rate": 5.6999999999999996e-05, "loss": 0.5421, "step": 19000 }, { "epoch": 0.21, "eval_loss": 0.5266169905662537, "eval_runtime": 2.4081, "eval_samples_per_second": 953.853, "eval_steps_per_second": 14.949, "step": 19000 }, { "epoch": 0.22, "learning_rate": 5.85e-05, "loss": 0.5357, "step": 19500 }, { "epoch": 0.22, "learning_rate": 5.9999999999999995e-05, "loss": 0.5301, "step": 20000 }, { "epoch": 0.22, "eval_loss": 0.5150080919265747, "eval_runtime": 2.3725, "eval_samples_per_second": 968.19, "eval_steps_per_second": 15.174, "step": 20000 }, { "epoch": 0.23, "learning_rate": 6.149999999999999e-05, "loss": 0.5248, "step": 20500 }, { "epoch": 0.23, "learning_rate": 6.299999999999999e-05, "loss": 0.5203, "step": 21000 }, { "epoch": 0.23, "eval_loss": 0.5055692195892334, "eval_runtime": 2.4443, "eval_samples_per_second": 939.719, "eval_steps_per_second": 14.728, "step": 21000 }, { "epoch": 0.24, "learning_rate": 6.45e-05, "loss": 0.5151, "step": 21500 }, { "epoch": 0.25, "learning_rate": 6.599999999999999e-05, "loss": 0.5105, "step": 22000 }, { "epoch": 0.25, "eval_loss": 0.4966994822025299, "eval_runtime": 2.3756, "eval_samples_per_second": 966.927, "eval_steps_per_second": 15.154, "step": 22000 }, { "epoch": 0.25, "learning_rate": 6.75e-05, "loss": 0.5062, "step": 22500 }, { "epoch": 0.26, "learning_rate": 6.9e-05, "loss": 0.5023, "step": 23000 }, { "epoch": 0.26, "eval_loss": 0.4855397045612335, "eval_runtime": 2.4105, "eval_samples_per_second": 952.91, "eval_steps_per_second": 14.935, "step": 23000 }, { "epoch": 0.26, "learning_rate": 7.049999999999999e-05, "loss": 0.4981, "step": 23500 }, { "epoch": 0.27, "learning_rate": 7.199999999999999e-05, "loss": 0.4951, "step": 24000 }, { "epoch": 0.27, "eval_loss": 0.4793773889541626, "eval_runtime": 2.3882, "eval_samples_per_second": 961.816, "eval_steps_per_second": 15.074, "step": 24000 }, { "epoch": 0.27, "learning_rate": 7.35e-05, "loss": 0.4904, "step": 24500 }, { "epoch": 0.28, "learning_rate": 7.5e-05, "loss": 0.4874, "step": 25000 }, { "epoch": 0.28, "eval_loss": 0.4764867126941681, "eval_runtime": 2.4085, "eval_samples_per_second": 953.713, "eval_steps_per_second": 14.947, "step": 25000 }, { "epoch": 0.28, "learning_rate": 7.649999999999999e-05, "loss": 0.4837, "step": 25500 }, { "epoch": 0.29, "learning_rate": 7.8e-05, "loss": 0.4803, "step": 26000 }, { "epoch": 0.29, "eval_loss": 0.46588340401649475, "eval_runtime": 2.4213, "eval_samples_per_second": 948.654, "eval_steps_per_second": 14.868, "step": 26000 }, { "epoch": 0.3, "learning_rate": 7.95e-05, "loss": 0.4768, "step": 26500 }, { "epoch": 0.3, "learning_rate": 8.1e-05, "loss": 0.4731, "step": 27000 }, { "epoch": 0.3, "eval_loss": 0.4568893611431122, "eval_runtime": 2.416, "eval_samples_per_second": 950.734, "eval_steps_per_second": 14.9, "step": 27000 }, { "epoch": 0.31, "learning_rate": 8.25e-05, "loss": 0.4701, "step": 27500 }, { "epoch": 0.31, "learning_rate": 8.4e-05, "loss": 0.4673, "step": 28000 }, { "epoch": 0.31, "eval_loss": 0.45133140683174133, "eval_runtime": 2.3585, "eval_samples_per_second": 973.912, "eval_steps_per_second": 15.264, "step": 28000 }, { "epoch": 0.32, "learning_rate": 8.549999999999999e-05, "loss": 0.4637, "step": 28500 }, { "epoch": 0.32, "learning_rate": 8.699999999999999e-05, "loss": 0.4607, "step": 29000 }, { "epoch": 0.32, "eval_loss": 0.44702839851379395, "eval_runtime": 2.4192, "eval_samples_per_second": 949.488, "eval_steps_per_second": 14.881, "step": 29000 }, { "epoch": 0.33, "learning_rate": 8.849999999999998e-05, "loss": 0.4579, "step": 29500 }, { "epoch": 0.33, "learning_rate": 8.999999999999999e-05, "loss": 0.4542, "step": 30000 }, { "epoch": 0.33, "eval_loss": 0.4402887225151062, "eval_runtime": 2.4131, "eval_samples_per_second": 951.902, "eval_steps_per_second": 14.919, "step": 30000 }, { "epoch": 0.34, "learning_rate": 9.149999999999999e-05, "loss": 0.4509, "step": 30500 }, { "epoch": 0.35, "learning_rate": 9.3e-05, "loss": 0.4477, "step": 31000 }, { "epoch": 0.35, "eval_loss": 0.4341259002685547, "eval_runtime": 2.3655, "eval_samples_per_second": 971.032, "eval_steps_per_second": 15.219, "step": 31000 }, { "epoch": 0.35, "learning_rate": 9.449999999999999e-05, "loss": 0.4443, "step": 31500 }, { "epoch": 0.36, "learning_rate": 9.599999999999999e-05, "loss": 0.4413, "step": 32000 }, { "epoch": 0.36, "eval_loss": 0.42718443274497986, "eval_runtime": 2.4008, "eval_samples_per_second": 956.762, "eval_steps_per_second": 14.995, "step": 32000 }, { "epoch": 0.36, "learning_rate": 9.75e-05, "loss": 0.4376, "step": 32500 }, { "epoch": 0.37, "learning_rate": 9.9e-05, "loss": 0.4341, "step": 33000 }, { "epoch": 0.37, "eval_loss": 0.41980886459350586, "eval_runtime": 2.3982, "eval_samples_per_second": 957.813, "eval_steps_per_second": 15.011, "step": 33000 }, { "epoch": 0.37, "learning_rate": 0.0001005, "loss": 0.4312, "step": 33500 }, { "epoch": 0.38, "learning_rate": 0.000102, "loss": 0.4289, "step": 34000 }, { "epoch": 0.38, "eval_loss": 0.4122560918331146, "eval_runtime": 2.425, "eval_samples_per_second": 947.221, "eval_steps_per_second": 14.845, "step": 34000 }, { "epoch": 0.38, "learning_rate": 0.00010349999999999998, "loss": 0.4257, "step": 34500 }, { "epoch": 0.39, "learning_rate": 0.00010499999999999999, "loss": 0.4224, "step": 35000 }, { "epoch": 0.39, "eval_loss": 0.40835943818092346, "eval_runtime": 2.4071, "eval_samples_per_second": 954.265, "eval_steps_per_second": 14.956, "step": 35000 }, { "epoch": 0.4, "learning_rate": 0.00010649999999999999, "loss": 0.4202, "step": 35500 }, { "epoch": 0.4, "learning_rate": 0.00010799999999999998, "loss": 0.4173, "step": 36000 }, { "epoch": 0.4, "eval_loss": 0.40327221155166626, "eval_runtime": 2.3753, "eval_samples_per_second": 967.051, "eval_steps_per_second": 15.156, "step": 36000 }, { "epoch": 0.41, "learning_rate": 0.00010949999999999999, "loss": 0.4142, "step": 36500 }, { "epoch": 0.41, "learning_rate": 0.00011099999999999999, "loss": 0.412, "step": 37000 }, { "epoch": 0.41, "eval_loss": 0.39642444252967834, "eval_runtime": 2.4394, "eval_samples_per_second": 941.616, "eval_steps_per_second": 14.758, "step": 37000 }, { "epoch": 0.42, "learning_rate": 0.0001125, "loss": 0.4098, "step": 37500 }, { "epoch": 0.42, "learning_rate": 0.00011399999999999999, "loss": 0.407, "step": 38000 }, { "epoch": 0.42, "eval_loss": 0.39146095514297485, "eval_runtime": 2.3792, "eval_samples_per_second": 965.449, "eval_steps_per_second": 15.131, "step": 38000 }, { "epoch": 0.43, "learning_rate": 0.00011549999999999999, "loss": 0.4033, "step": 38500 }, { "epoch": 0.43, "learning_rate": 0.000117, "loss": 0.402, "step": 39000 }, { "epoch": 0.43, "eval_loss": 0.3854062259197235, "eval_runtime": 2.4437, "eval_samples_per_second": 939.967, "eval_steps_per_second": 14.732, "step": 39000 }, { "epoch": 0.44, "learning_rate": 0.0001185, "loss": 0.3991, "step": 39500 }, { "epoch": 0.45, "learning_rate": 0.00011999999999999999, "loss": 0.3966, "step": 40000 }, { "epoch": 0.45, "eval_loss": 0.3808075189590454, "eval_runtime": 2.4355, "eval_samples_per_second": 943.135, "eval_steps_per_second": 14.781, "step": 40000 }, { "epoch": 0.45, "learning_rate": 0.0001215, "loss": 0.3943, "step": 40500 }, { "epoch": 0.46, "learning_rate": 0.00012299999999999998, "loss": 0.3929, "step": 41000 }, { "epoch": 0.46, "eval_loss": 0.37926527857780457, "eval_runtime": 2.3557, "eval_samples_per_second": 975.086, "eval_steps_per_second": 15.282, "step": 41000 }, { "epoch": 0.46, "learning_rate": 0.0001245, "loss": 0.39, "step": 41500 }, { "epoch": 0.47, "learning_rate": 0.00012599999999999997, "loss": 0.3873, "step": 42000 }, { "epoch": 0.47, "eval_loss": 0.37127774953842163, "eval_runtime": 2.3664, "eval_samples_per_second": 970.675, "eval_steps_per_second": 15.213, "step": 42000 }, { "epoch": 0.47, "learning_rate": 0.00012749999999999998, "loss": 0.3861, "step": 42500 }, { "epoch": 0.48, "learning_rate": 0.000129, "loss": 0.3837, "step": 43000 }, { "epoch": 0.48, "eval_loss": 0.36950594186782837, "eval_runtime": 2.4197, "eval_samples_per_second": 949.3, "eval_steps_per_second": 14.878, "step": 43000 }, { "epoch": 0.49, "learning_rate": 0.0001305, "loss": 0.3812, "step": 43500 }, { "epoch": 0.49, "learning_rate": 0.00013199999999999998, "loss": 0.3793, "step": 44000 }, { "epoch": 0.49, "eval_loss": 0.3651977479457855, "eval_runtime": 2.3939, "eval_samples_per_second": 959.528, "eval_steps_per_second": 15.038, "step": 44000 }, { "epoch": 0.5, "learning_rate": 0.0001335, "loss": 0.3775, "step": 44500 }, { "epoch": 0.5, "learning_rate": 0.000135, "loss": 0.3756, "step": 45000 }, { "epoch": 0.5, "eval_loss": 0.3592735230922699, "eval_runtime": 2.3855, "eval_samples_per_second": 962.901, "eval_steps_per_second": 15.091, "step": 45000 }, { "epoch": 0.51, "learning_rate": 0.00013649999999999998, "loss": 0.3737, "step": 45500 }, { "epoch": 0.51, "learning_rate": 0.000138, "loss": 0.3718, "step": 46000 }, { "epoch": 0.51, "eval_loss": 0.3585481643676758, "eval_runtime": 2.3854, "eval_samples_per_second": 962.952, "eval_steps_per_second": 15.092, "step": 46000 }, { "epoch": 0.52, "learning_rate": 0.0001395, "loss": 0.3704, "step": 46500 }, { "epoch": 0.52, "learning_rate": 0.00014099999999999998, "loss": 0.3687, "step": 47000 }, { "epoch": 0.52, "eval_loss": 0.3562163710594177, "eval_runtime": 2.4137, "eval_samples_per_second": 951.637, "eval_steps_per_second": 14.915, "step": 47000 }, { "epoch": 0.53, "learning_rate": 0.0001425, "loss": 0.367, "step": 47500 }, { "epoch": 0.54, "learning_rate": 0.00014399999999999998, "loss": 0.3654, "step": 48000 }, { "epoch": 0.54, "eval_loss": 0.35154005885124207, "eval_runtime": 2.4671, "eval_samples_per_second": 931.04, "eval_steps_per_second": 14.592, "step": 48000 }, { "epoch": 0.54, "learning_rate": 0.00014549999999999999, "loss": 0.3638, "step": 48500 }, { "epoch": 0.55, "learning_rate": 0.000147, "loss": 0.3625, "step": 49000 }, { "epoch": 0.55, "eval_loss": 0.3474389910697937, "eval_runtime": 2.4177, "eval_samples_per_second": 950.09, "eval_steps_per_second": 14.89, "step": 49000 }, { "epoch": 0.55, "learning_rate": 0.00014849999999999998, "loss": 0.3612, "step": 49500 }, { "epoch": 0.56, "learning_rate": 0.00015, "loss": 0.3592, "step": 50000 }, { "epoch": 0.56, "eval_loss": 0.3449079096317291, "eval_runtime": 2.4174, "eval_samples_per_second": 950.212, "eval_steps_per_second": 14.892, "step": 50000 }, { "epoch": 0.56, "learning_rate": 0.00014999990431133645, "loss": 0.3579, "step": 50500 }, { "epoch": 0.57, "learning_rate": 0.0001499996172456075, "loss": 0.3563, "step": 51000 }, { "epoch": 0.57, "eval_loss": 0.3443795442581177, "eval_runtime": 2.3466, "eval_samples_per_second": 978.854, "eval_steps_per_second": 15.341, "step": 51000 }, { "epoch": 0.57, "learning_rate": 0.00014999913880359787, "loss": 0.3547, "step": 51500 }, { "epoch": 0.58, "learning_rate": 0.00014999846898661572, "loss": 0.353, "step": 52000 }, { "epoch": 0.58, "eval_loss": 0.34043800830841064, "eval_runtime": 2.4046, "eval_samples_per_second": 955.259, "eval_steps_per_second": 14.971, "step": 52000 }, { "epoch": 0.59, "learning_rate": 0.00014999760779649222, "loss": 0.3516, "step": 52500 }, { "epoch": 0.59, "learning_rate": 0.00014999655523558183, "loss": 0.3502, "step": 53000 }, { "epoch": 0.59, "eval_loss": 0.3358021378517151, "eval_runtime": 2.3861, "eval_samples_per_second": 962.643, "eval_steps_per_second": 15.087, "step": 53000 }, { "epoch": 0.6, "learning_rate": 0.00014999531130676229, "loss": 0.3491, "step": 53500 }, { "epoch": 0.6, "learning_rate": 0.00014999387601343436, "loss": 0.3473, "step": 54000 }, { "epoch": 0.6, "eval_loss": 0.3330630362033844, "eval_runtime": 2.3348, "eval_samples_per_second": 983.811, "eval_steps_per_second": 15.419, "step": 54000 }, { "epoch": 0.61, "learning_rate": 0.00014999224935952215, "loss": 0.3463, "step": 54500 }, { "epoch": 0.61, "learning_rate": 0.00014999043134947282, "loss": 0.3445, "step": 55000 }, { "epoch": 0.61, "eval_loss": 0.33030495047569275, "eval_runtime": 2.318, "eval_samples_per_second": 990.921, "eval_steps_per_second": 15.53, "step": 55000 }, { "epoch": 0.62, "learning_rate": 0.00014998842198825674, "loss": 0.3434, "step": 55500 }, { "epoch": 0.62, "learning_rate": 0.00014998622128136748, "loss": 0.342, "step": 56000 }, { "epoch": 0.62, "eval_loss": 0.3295433819293976, "eval_runtime": 2.3042, "eval_samples_per_second": 996.891, "eval_steps_per_second": 15.624, "step": 56000 }, { "epoch": 0.63, "learning_rate": 0.00014998382923482164, "loss": 0.3411, "step": 56500 }, { "epoch": 0.64, "learning_rate": 0.000149981245855159, "loss": 0.3396, "step": 57000 }, { "epoch": 0.64, "eval_loss": 0.32805606722831726, "eval_runtime": 2.4137, "eval_samples_per_second": 951.659, "eval_steps_per_second": 14.915, "step": 57000 }, { "epoch": 0.64, "learning_rate": 0.00014997847114944242, "loss": 0.3383, "step": 57500 }, { "epoch": 0.65, "learning_rate": 0.00014997550512525784, "loss": 0.3374, "step": 58000 }, { "epoch": 0.65, "eval_loss": 0.3256905674934387, "eval_runtime": 2.4211, "eval_samples_per_second": 948.748, "eval_steps_per_second": 14.869, "step": 58000 }, { "epoch": 0.65, "learning_rate": 0.00014997234779071426, "loss": 0.3365, "step": 58500 }, { "epoch": 0.66, "learning_rate": 0.0001499689991544437, "loss": 0.3353, "step": 59000 }, { "epoch": 0.66, "eval_loss": 0.32548126578330994, "eval_runtime": 2.4309, "eval_samples_per_second": 944.932, "eval_steps_per_second": 14.81, "step": 59000 }, { "epoch": 0.66, "learning_rate": 0.0001499654592256012, "loss": 0.3342, "step": 59500 }, { "epoch": 0.67, "learning_rate": 0.00014996172801386482, "loss": 0.3333, "step": 60000 }, { "epoch": 0.67, "eval_loss": 0.3190021514892578, "eval_runtime": 2.4381, "eval_samples_per_second": 942.115, "eval_steps_per_second": 14.765, "step": 60000 }, { "epoch": 0.67, "learning_rate": 0.00014995780552943551, "loss": 0.3321, "step": 60500 }, { "epoch": 0.68, "learning_rate": 0.00014995369178303722, "loss": 0.3311, "step": 61000 }, { "epoch": 0.68, "eval_loss": 0.3181557357311249, "eval_runtime": 2.3632, "eval_samples_per_second": 971.967, "eval_steps_per_second": 15.233, "step": 61000 }, { "epoch": 0.69, "learning_rate": 0.0001499493867859168, "loss": 0.3298, "step": 61500 }, { "epoch": 0.69, "learning_rate": 0.0001499448905498439, "loss": 0.3289, "step": 62000 }, { "epoch": 0.69, "eval_loss": 0.31774210929870605, "eval_runtime": 2.4377, "eval_samples_per_second": 942.269, "eval_steps_per_second": 14.768, "step": 62000 }, { "epoch": 0.7, "learning_rate": 0.00014994020308711106, "loss": 0.3281, "step": 62500 }, { "epoch": 0.7, "learning_rate": 0.00014993532441053364, "loss": 0.3272, "step": 63000 }, { "epoch": 0.7, "eval_loss": 0.31380537152290344, "eval_runtime": 2.4068, "eval_samples_per_second": 954.378, "eval_steps_per_second": 14.958, "step": 63000 }, { "epoch": 0.71, "learning_rate": 0.0001499302545334498, "loss": 0.3262, "step": 63500 }, { "epoch": 0.71, "learning_rate": 0.0001499249934697203, "loss": 0.3253, "step": 64000 }, { "epoch": 0.71, "eval_loss": 0.3134210705757141, "eval_runtime": 2.4456, "eval_samples_per_second": 939.234, "eval_steps_per_second": 14.72, "step": 64000 }, { "epoch": 0.72, "learning_rate": 0.00014991954123372875, "loss": 0.3246, "step": 64500 }, { "epoch": 0.72, "learning_rate": 0.0001499138978403813, "loss": 0.3242, "step": 65000 }, { "epoch": 0.72, "eval_loss": 0.3107437193393707, "eval_runtime": 2.3823, "eval_samples_per_second": 964.183, "eval_steps_per_second": 15.111, "step": 65000 }, { "epoch": 0.73, "learning_rate": 0.00014990806330510687, "loss": 0.3231, "step": 65500 }, { "epoch": 0.74, "learning_rate": 0.00014990203764385677, "loss": 0.3221, "step": 66000 }, { "epoch": 0.74, "eval_loss": 0.308339387178421, "eval_runtime": 2.4467, "eval_samples_per_second": 938.797, "eval_steps_per_second": 14.713, "step": 66000 }, { "epoch": 0.74, "learning_rate": 0.00014989582087310494, "loss": 0.3211, "step": 66500 }, { "epoch": 0.75, "learning_rate": 0.00014988941300984784, "loss": 0.3203, "step": 67000 }, { "epoch": 0.75, "eval_loss": 0.3079957365989685, "eval_runtime": 2.3779, "eval_samples_per_second": 965.976, "eval_steps_per_second": 15.139, "step": 67000 }, { "epoch": 0.75, "learning_rate": 0.00014988281407160426, "loss": 0.3194, "step": 67500 }, { "epoch": 0.76, "learning_rate": 0.0001498760240764155, "loss": 0.3188, "step": 68000 }, { "epoch": 0.76, "eval_loss": 0.30548328161239624, "eval_runtime": 2.3902, "eval_samples_per_second": 961.017, "eval_steps_per_second": 15.062, "step": 68000 }, { "epoch": 0.76, "learning_rate": 0.00014986904304284512, "loss": 0.3181, "step": 68500 }, { "epoch": 0.77, "learning_rate": 0.000149861870989979, "loss": 0.3169, "step": 69000 }, { "epoch": 0.77, "eval_loss": 0.3034500181674957, "eval_runtime": 2.3995, "eval_samples_per_second": 957.293, "eval_steps_per_second": 15.003, "step": 69000 }, { "epoch": 0.78, "learning_rate": 0.00014985450793742527, "loss": 0.3164, "step": 69500 }, { "epoch": 0.78, "learning_rate": 0.0001498469539053142, "loss": 0.3157, "step": 70000 }, { "epoch": 0.78, "eval_loss": 0.3020155429840088, "eval_runtime": 2.3865, "eval_samples_per_second": 962.488, "eval_steps_per_second": 15.085, "step": 70000 }, { "epoch": 0.79, "learning_rate": 0.00014983920891429827, "loss": 0.3158, "step": 70500 }, { "epoch": 0.79, "learning_rate": 0.00014983127298555198, "loss": 0.3143, "step": 71000 }, { "epoch": 0.79, "eval_loss": 0.3028620779514313, "eval_runtime": 2.4295, "eval_samples_per_second": 945.481, "eval_steps_per_second": 14.818, "step": 71000 }, { "epoch": 0.8, "learning_rate": 0.00014982314614077184, "loss": 0.3136, "step": 71500 }, { "epoch": 0.8, "learning_rate": 0.00014981482840217632, "loss": 0.3136, "step": 72000 }, { "epoch": 0.8, "eval_loss": 0.2998126149177551, "eval_runtime": 2.3756, "eval_samples_per_second": 966.92, "eval_steps_per_second": 15.154, "step": 72000 }, { "epoch": 0.81, "learning_rate": 0.00014980631979250587, "loss": 0.3125, "step": 72500 }, { "epoch": 0.81, "learning_rate": 0.00014979762033502262, "loss": 0.3117, "step": 73000 }, { "epoch": 0.81, "eval_loss": 0.2993732988834381, "eval_runtime": 2.3811, "eval_samples_per_second": 964.682, "eval_steps_per_second": 15.119, "step": 73000 }, { "epoch": 0.82, "learning_rate": 0.0001497887300535106, "loss": 0.3113, "step": 73500 }, { "epoch": 0.83, "learning_rate": 0.00014977964897227547, "loss": 0.3104, "step": 74000 }, { "epoch": 0.83, "eval_loss": 0.2992545962333679, "eval_runtime": 2.3838, "eval_samples_per_second": 963.585, "eval_steps_per_second": 15.102, "step": 74000 }, { "epoch": 0.83, "learning_rate": 0.0001497703771161446, "loss": 0.3096, "step": 74500 }, { "epoch": 0.84, "learning_rate": 0.00014976091451046687, "loss": 0.3094, "step": 75000 }, { "epoch": 0.84, "eval_loss": 0.2973059415817261, "eval_runtime": 2.4216, "eval_samples_per_second": 948.528, "eval_steps_per_second": 14.866, "step": 75000 }, { "epoch": 0.84, "learning_rate": 0.00014975126118111268, "loss": 0.3086, "step": 75500 }, { "epoch": 0.85, "learning_rate": 0.00014974141715447386, "loss": 0.3075, "step": 76000 }, { "epoch": 0.85, "eval_loss": 0.2966085374355316, "eval_runtime": 2.4258, "eval_samples_per_second": 946.915, "eval_steps_per_second": 14.841, "step": 76000 }, { "epoch": 0.85, "learning_rate": 0.00014973138245746363, "loss": 0.3078, "step": 76500 }, { "epoch": 0.86, "learning_rate": 0.00014972115711751644, "loss": 0.3062, "step": 77000 }, { "epoch": 0.86, "eval_loss": 0.29561737179756165, "eval_runtime": 2.3702, "eval_samples_per_second": 969.126, "eval_steps_per_second": 15.189, "step": 77000 }, { "epoch": 0.86, "learning_rate": 0.00014971074116258796, "loss": 0.3063, "step": 77500 }, { "epoch": 0.87, "learning_rate": 0.00014970013462115505, "loss": 0.3054, "step": 78000 }, { "epoch": 0.87, "eval_loss": 0.29380762577056885, "eval_runtime": 2.3743, "eval_samples_per_second": 967.43, "eval_steps_per_second": 15.162, "step": 78000 }, { "epoch": 0.88, "learning_rate": 0.00014968933752221558, "loss": 0.3051, "step": 78500 }, { "epoch": 0.88, "learning_rate": 0.00014967834989528843, "loss": 0.3044, "step": 79000 }, { "epoch": 0.88, "eval_loss": 0.29091179370880127, "eval_runtime": 2.4306, "eval_samples_per_second": 945.023, "eval_steps_per_second": 14.811, "step": 79000 }, { "epoch": 0.89, "learning_rate": 0.0001496671717704133, "loss": 0.3039, "step": 79500 }, { "epoch": 0.89, "learning_rate": 0.00014965580317815078, "loss": 0.3031, "step": 80000 }, { "epoch": 0.89, "eval_loss": 0.2893276512622833, "eval_runtime": 2.3942, "eval_samples_per_second": 959.413, "eval_steps_per_second": 15.037, "step": 80000 }, { "epoch": 0.9, "learning_rate": 0.0001496442441495822, "loss": 0.303, "step": 80500 }, { "epoch": 0.9, "learning_rate": 0.00014963249471630944, "loss": 0.3024, "step": 81000 }, { "epoch": 0.9, "eval_loss": 0.2880454361438751, "eval_runtime": 2.3598, "eval_samples_per_second": 973.408, "eval_steps_per_second": 15.256, "step": 81000 }, { "epoch": 0.91, "learning_rate": 0.00014962055491045506, "loss": 0.3019, "step": 81500 }, { "epoch": 0.91, "learning_rate": 0.000149608424764662, "loss": 0.3016, "step": 82000 }, { "epoch": 0.91, "eval_loss": 0.2889193892478943, "eval_runtime": 2.392, "eval_samples_per_second": 960.275, "eval_steps_per_second": 15.05, "step": 82000 }, { "epoch": 0.92, "learning_rate": 0.00014959610431209363, "loss": 0.3008, "step": 82500 }, { "epoch": 0.93, "learning_rate": 0.0001495835935864336, "loss": 0.3001, "step": 83000 }, { "epoch": 0.93, "eval_loss": 0.2879210412502289, "eval_runtime": 2.3362, "eval_samples_per_second": 983.21, "eval_steps_per_second": 15.409, "step": 83000 }, { "epoch": 0.93, "learning_rate": 0.00014957089262188571, "loss": 0.3001, "step": 83500 }, { "epoch": 0.94, "learning_rate": 0.00014955800145317397, "loss": 0.2994, "step": 84000 }, { "epoch": 0.94, "eval_loss": 0.28926190733909607, "eval_runtime": 2.354, "eval_samples_per_second": 975.772, "eval_steps_per_second": 15.293, "step": 84000 }, { "epoch": 0.94, "learning_rate": 0.00014954492011554234, "loss": 0.2992, "step": 84500 }, { "epoch": 0.95, "learning_rate": 0.00014953164864475466, "loss": 0.2986, "step": 85000 }, { "epoch": 0.95, "eval_loss": 0.28680220246315, "eval_runtime": 2.4523, "eval_samples_per_second": 936.685, "eval_steps_per_second": 14.68, "step": 85000 }, { "epoch": 0.95, "learning_rate": 0.00014951818707709463, "loss": 0.2981, "step": 85500 }, { "epoch": 0.96, "learning_rate": 0.0001495045354493657, "loss": 0.297, "step": 86000 }, { "epoch": 0.96, "eval_loss": 0.28654542565345764, "eval_runtime": 2.3431, "eval_samples_per_second": 980.338, "eval_steps_per_second": 15.364, "step": 86000 }, { "epoch": 0.96, "learning_rate": 0.00014949069379889088, "loss": 0.2972, "step": 86500 }, { "epoch": 0.97, "learning_rate": 0.00014947666216351272, "loss": 0.2965, "step": 87000 }, { "epoch": 0.97, "eval_loss": 0.2838832139968872, "eval_runtime": 2.3161, "eval_samples_per_second": 991.759, "eval_steps_per_second": 15.543, "step": 87000 }, { "epoch": 0.98, "learning_rate": 0.00014946244058159313, "loss": 0.2961, "step": 87500 }, { "epoch": 0.98, "learning_rate": 0.00014944802909201344, "loss": 0.2951, "step": 88000 }, { "epoch": 0.98, "eval_loss": 0.28492867946624756, "eval_runtime": 2.2962, "eval_samples_per_second": 1000.341, "eval_steps_per_second": 15.678, "step": 88000 }, { "epoch": 0.99, "learning_rate": 0.00014943342773417407, "loss": 0.2954, "step": 88500 }, { "epoch": 0.99, "learning_rate": 0.00014941863654799456, "loss": 0.2947, "step": 89000 }, { "epoch": 0.99, "eval_loss": 0.28069427609443665, "eval_runtime": 2.3864, "eval_samples_per_second": 962.549, "eval_steps_per_second": 15.086, "step": 89000 }, { "epoch": 1.0, "learning_rate": 0.0001494036555739135, "loss": 0.2943, "step": 89500 }, { "epoch": 1.0, "learning_rate": 0.00014938848485288825, "loss": 0.2943, "step": 90000 }, { "epoch": 1.0, "eval_loss": 0.2816523611545563, "eval_runtime": 2.5066, "eval_samples_per_second": 916.368, "eval_steps_per_second": 14.362, "step": 90000 }, { "epoch": 1.01, "learning_rate": 0.000149373124426395, "loss": 0.2933, "step": 90500 }, { "epoch": 1.01, "learning_rate": 0.0001493575743364286, "loss": 0.2934, "step": 91000 }, { "epoch": 1.01, "eval_loss": 0.2812552750110626, "eval_runtime": 2.6367, "eval_samples_per_second": 871.176, "eval_steps_per_second": 13.654, "step": 91000 }, { "epoch": 1.02, "learning_rate": 0.00014934183462550238, "loss": 0.2926, "step": 91500 }, { "epoch": 1.03, "learning_rate": 0.00014932590533664808, "loss": 0.2923, "step": 92000 }, { "epoch": 1.03, "eval_loss": 0.2808503210544586, "eval_runtime": 2.4894, "eval_samples_per_second": 922.7, "eval_steps_per_second": 14.461, "step": 92000 }, { "epoch": 1.03, "learning_rate": 0.00014930978651341581, "loss": 0.292, "step": 92500 }, { "epoch": 1.04, "learning_rate": 0.0001492934781998738, "loss": 0.2918, "step": 93000 }, { "epoch": 1.04, "eval_loss": 0.2790589928627014, "eval_runtime": 2.5204, "eval_samples_per_second": 911.355, "eval_steps_per_second": 14.283, "step": 93000 }, { "epoch": 1.04, "learning_rate": 0.00014927698044060834, "loss": 0.291, "step": 93500 }, { "epoch": 1.05, "learning_rate": 0.0001492602932807237, "loss": 0.2911, "step": 94000 }, { "epoch": 1.05, "eval_loss": 0.2791271209716797, "eval_runtime": 2.5437, "eval_samples_per_second": 903.029, "eval_steps_per_second": 14.153, "step": 94000 }, { "epoch": 1.05, "learning_rate": 0.00014924341676584194, "loss": 0.29, "step": 94500 }, { "epoch": 1.06, "learning_rate": 0.00014922635094210277, "loss": 0.2904, "step": 95000 }, { "epoch": 1.06, "eval_loss": 0.2779350280761719, "eval_runtime": 2.6041, "eval_samples_per_second": 882.055, "eval_steps_per_second": 13.824, "step": 95000 }, { "epoch": 1.07, "learning_rate": 0.00014920909585616356, "loss": 0.2893, "step": 95500 }, { "epoch": 1.07, "learning_rate": 0.000149191651555199, "loss": 0.2897, "step": 96000 }, { "epoch": 1.07, "eval_loss": 0.278182715177536, "eval_runtime": 2.5817, "eval_samples_per_second": 889.72, "eval_steps_per_second": 13.944, "step": 96000 }, { "epoch": 1.08, "learning_rate": 0.00014917401808690116, "loss": 0.2884, "step": 96500 }, { "epoch": 1.08, "learning_rate": 0.0001491561954994793, "loss": 0.2888, "step": 97000 }, { "epoch": 1.08, "eval_loss": 0.27881118655204773, "eval_runtime": 2.5487, "eval_samples_per_second": 901.234, "eval_steps_per_second": 14.125, "step": 97000 }, { "epoch": 1.09, "learning_rate": 0.00014913818384165964, "loss": 0.2882, "step": 97500 }, { "epoch": 1.09, "learning_rate": 0.00014911998316268537, "loss": 0.2878, "step": 98000 }, { "epoch": 1.09, "eval_loss": 0.2754364311695099, "eval_runtime": 2.537, "eval_samples_per_second": 905.405, "eval_steps_per_second": 14.19, "step": 98000 }, { "epoch": 1.1, "learning_rate": 0.00014910159351231653, "loss": 0.2873, "step": 98500 }, { "epoch": 1.1, "learning_rate": 0.00014908301494082963, "loss": 0.2869, "step": 99000 }, { "epoch": 1.1, "eval_loss": 0.2750239074230194, "eval_runtime": 2.5521, "eval_samples_per_second": 900.043, "eval_steps_per_second": 14.106, "step": 99000 }, { "epoch": 1.11, "learning_rate": 0.0001490642474990178, "loss": 0.2865, "step": 99500 }, { "epoch": 1.12, "learning_rate": 0.00014904529123819054, "loss": 0.2863, "step": 100000 }, { "epoch": 1.12, "eval_loss": 0.27601054310798645, "eval_runtime": 2.5872, "eval_samples_per_second": 887.823, "eval_steps_per_second": 13.915, "step": 100000 }, { "epoch": 1.12, "learning_rate": 0.00014902614621017352, "loss": 0.2863, "step": 100500 }, { "epoch": 1.13, "learning_rate": 0.00014900681246730852, "loss": 0.2856, "step": 101000 }, { "epoch": 1.13, "eval_loss": 0.27266713976860046, "eval_runtime": 2.477, "eval_samples_per_second": 927.344, "eval_steps_per_second": 14.534, "step": 101000 }, { "epoch": 1.13, "learning_rate": 0.00014898729006245328, "loss": 0.2853, "step": 101500 }, { "epoch": 1.14, "learning_rate": 0.00014896757904898125, "loss": 0.2852, "step": 102000 }, { "epoch": 1.14, "eval_loss": 0.27444031834602356, "eval_runtime": 2.5666, "eval_samples_per_second": 894.954, "eval_steps_per_second": 14.026, "step": 102000 }, { "epoch": 1.14, "learning_rate": 0.0001489476794807816, "loss": 0.2846, "step": 102500 }, { "epoch": 1.15, "learning_rate": 0.00014892759141225904, "loss": 0.2846, "step": 103000 }, { "epoch": 1.15, "eval_loss": 0.2744878828525543, "eval_runtime": 2.61, "eval_samples_per_second": 880.071, "eval_steps_per_second": 13.793, "step": 103000 }, { "epoch": 1.15, "learning_rate": 0.00014890731489833355, "loss": 0.2838, "step": 103500 }, { "epoch": 1.16, "learning_rate": 0.00014888684999444035, "loss": 0.2838, "step": 104000 }, { "epoch": 1.16, "eval_loss": 0.2741919755935669, "eval_runtime": 2.5544, "eval_samples_per_second": 899.235, "eval_steps_per_second": 14.093, "step": 104000 }, { "epoch": 1.17, "learning_rate": 0.00014886619675652968, "loss": 0.2836, "step": 104500 }, { "epoch": 1.17, "learning_rate": 0.00014884535524106675, "loss": 0.2832, "step": 105000 }, { "epoch": 1.17, "eval_loss": 0.2710511386394501, "eval_runtime": 2.6471, "eval_samples_per_second": 867.747, "eval_steps_per_second": 13.6, "step": 105000 }, { "epoch": 1.18, "learning_rate": 0.00014882432550503144, "loss": 0.2826, "step": 105500 }, { "epoch": 1.18, "learning_rate": 0.00014880310760591824, "loss": 0.2822, "step": 106000 }, { "epoch": 1.18, "eval_loss": 0.2754230499267578, "eval_runtime": 2.637, "eval_samples_per_second": 871.051, "eval_steps_per_second": 13.652, "step": 106000 }, { "epoch": 1.19, "learning_rate": 0.0001487817016017361, "loss": 0.2824, "step": 106500 }, { "epoch": 1.19, "learning_rate": 0.0001487601075510082, "loss": 0.2819, "step": 107000 }, { "epoch": 1.19, "eval_loss": 0.2707637846469879, "eval_runtime": 2.5493, "eval_samples_per_second": 901.018, "eval_steps_per_second": 14.121, "step": 107000 }, { "epoch": 1.2, "learning_rate": 0.00014873832551277186, "loss": 0.2811, "step": 107500 }, { "epoch": 1.2, "learning_rate": 0.0001487163555465783, "loss": 0.2813, "step": 108000 }, { "epoch": 1.2, "eval_loss": 0.27113014459609985, "eval_runtime": 2.5222, "eval_samples_per_second": 910.7, "eval_steps_per_second": 14.273, "step": 108000 }, { "epoch": 1.21, "learning_rate": 0.00014869419771249264, "loss": 0.2815, "step": 108500 }, { "epoch": 1.22, "learning_rate": 0.0001486718520710935, "loss": 0.2807, "step": 109000 }, { "epoch": 1.22, "eval_loss": 0.270040363073349, "eval_runtime": 2.5255, "eval_samples_per_second": 909.511, "eval_steps_per_second": 14.254, "step": 109000 }, { "epoch": 1.22, "learning_rate": 0.00014864931868347302, "loss": 0.2802, "step": 109500 }, { "epoch": 1.23, "learning_rate": 0.00014862659761123663, "loss": 0.2802, "step": 110000 }, { "epoch": 1.23, "eval_loss": 0.26776713132858276, "eval_runtime": 2.447, "eval_samples_per_second": 938.683, "eval_steps_per_second": 14.712, "step": 110000 }, { "epoch": 1.23, "learning_rate": 0.0001486036889165029, "loss": 0.28, "step": 110500 }, { "epoch": 1.24, "learning_rate": 0.00014858059266190327, "loss": 0.2794, "step": 111000 }, { "epoch": 1.24, "eval_loss": 0.2689948081970215, "eval_runtime": 2.4933, "eval_samples_per_second": 921.261, "eval_steps_per_second": 14.439, "step": 111000 }, { "epoch": 1.24, "learning_rate": 0.00014855730891058204, "loss": 0.2795, "step": 111500 }, { "epoch": 1.25, "learning_rate": 0.00014853383772619612, "loss": 0.2791, "step": 112000 }, { "epoch": 1.25, "eval_loss": 0.26764577627182007, "eval_runtime": 2.6324, "eval_samples_per_second": 872.576, "eval_steps_per_second": 13.676, "step": 112000 }, { "epoch": 1.25, "learning_rate": 0.0001485101791729148, "loss": 0.2791, "step": 112500 }, { "epoch": 1.26, "learning_rate": 0.00014848633331541967, "loss": 0.2784, "step": 113000 }, { "epoch": 1.26, "eval_loss": 0.27024123072624207, "eval_runtime": 2.5135, "eval_samples_per_second": 913.848, "eval_steps_per_second": 14.322, "step": 113000 }, { "epoch": 1.27, "learning_rate": 0.00014846230021890443, "loss": 0.2781, "step": 113500 }, { "epoch": 1.27, "learning_rate": 0.0001484380799490746, "loss": 0.278, "step": 114000 }, { "epoch": 1.27, "eval_loss": 0.26657336950302124, "eval_runtime": 2.6002, "eval_samples_per_second": 883.406, "eval_steps_per_second": 13.845, "step": 114000 }, { "epoch": 1.28, "learning_rate": 0.0001484136725721475, "loss": 0.2776, "step": 114500 }, { "epoch": 1.28, "learning_rate": 0.00014838907815485194, "loss": 0.2775, "step": 115000 }, { "epoch": 1.28, "eval_loss": 0.2676675021648407, "eval_runtime": 2.608, "eval_samples_per_second": 880.739, "eval_steps_per_second": 13.803, "step": 115000 }, { "epoch": 1.29, "learning_rate": 0.00014836429676442814, "loss": 0.2774, "step": 115500 }, { "epoch": 1.29, "learning_rate": 0.00014833932846862748, "loss": 0.2767, "step": 116000 }, { "epoch": 1.29, "eval_loss": 0.2658541798591614, "eval_runtime": 2.528, "eval_samples_per_second": 908.623, "eval_steps_per_second": 14.24, "step": 116000 }, { "epoch": 1.3, "learning_rate": 0.0001483141733357123, "loss": 0.2767, "step": 116500 }, { "epoch": 1.3, "learning_rate": 0.00014828883143445582, "loss": 0.2762, "step": 117000 }, { "epoch": 1.3, "eval_loss": 0.2639394998550415, "eval_runtime": 2.5898, "eval_samples_per_second": 886.929, "eval_steps_per_second": 13.9, "step": 117000 }, { "epoch": 1.31, "learning_rate": 0.00014826330283414178, "loss": 0.2774, "step": 117500 }, { "epoch": 1.32, "learning_rate": 0.0001482375876045644, "loss": 0.2753, "step": 118000 }, { "epoch": 1.32, "eval_loss": 0.2651030123233795, "eval_runtime": 2.4743, "eval_samples_per_second": 928.341, "eval_steps_per_second": 14.55, "step": 118000 }, { "epoch": 1.32, "learning_rate": 0.0001482116858160282, "loss": 0.2756, "step": 118500 }, { "epoch": 1.33, "learning_rate": 0.0001481855975393476, "loss": 0.275, "step": 119000 }, { "epoch": 1.33, "eval_loss": 0.2663949429988861, "eval_runtime": 2.5801, "eval_samples_per_second": 890.26, "eval_steps_per_second": 13.953, "step": 119000 }, { "epoch": 1.33, "learning_rate": 0.000148159322845847, "loss": 0.2747, "step": 119500 }, { "epoch": 1.34, "learning_rate": 0.0001481328618073604, "loss": 0.2746, "step": 120000 }, { "epoch": 1.34, "eval_loss": 0.2646242678165436, "eval_runtime": 2.5527, "eval_samples_per_second": 899.828, "eval_steps_per_second": 14.103, "step": 120000 }, { "epoch": 1.34, "learning_rate": 0.00014810621449623125, "loss": 0.2749, "step": 120500 }, { "epoch": 1.35, "learning_rate": 0.0001480793809853123, "loss": 0.2749, "step": 121000 }, { "epoch": 1.35, "eval_loss": 0.26220399141311646, "eval_runtime": 2.518, "eval_samples_per_second": 912.241, "eval_steps_per_second": 14.297, "step": 121000 }, { "epoch": 1.36, "learning_rate": 0.00014805236134796536, "loss": 0.2741, "step": 121500 }, { "epoch": 1.36, "learning_rate": 0.00014802515565806107, "loss": 0.2736, "step": 122000 }, { "epoch": 1.36, "eval_loss": 0.26149359345436096, "eval_runtime": 2.5346, "eval_samples_per_second": 906.265, "eval_steps_per_second": 14.204, "step": 122000 }, { "epoch": 1.37, "learning_rate": 0.00014799776398997873, "loss": 0.2734, "step": 122500 }, { "epoch": 1.37, "learning_rate": 0.00014797018641860612, "loss": 0.2735, "step": 123000 }, { "epoch": 1.37, "eval_loss": 0.2607334852218628, "eval_runtime": 2.5063, "eval_samples_per_second": 916.502, "eval_steps_per_second": 14.364, "step": 123000 }, { "epoch": 1.38, "learning_rate": 0.00014794242301933928, "loss": 0.2736, "step": 123500 }, { "epoch": 1.38, "learning_rate": 0.0001479144738680823, "loss": 0.2729, "step": 124000 }, { "epoch": 1.38, "eval_loss": 0.2600594162940979, "eval_runtime": 2.5363, "eval_samples_per_second": 905.664, "eval_steps_per_second": 14.194, "step": 124000 }, { "epoch": 1.39, "learning_rate": 0.00014788633904124708, "loss": 0.2727, "step": 124500 }, { "epoch": 1.39, "learning_rate": 0.00014785801861575312, "loss": 0.2724, "step": 125000 }, { "epoch": 1.39, "eval_loss": 0.2614007294178009, "eval_runtime": 2.556, "eval_samples_per_second": 898.656, "eval_steps_per_second": 14.084, "step": 125000 }, { "epoch": 1.4, "learning_rate": 0.0001478295126690274, "loss": 0.2723, "step": 125500 }, { "epoch": 1.41, "learning_rate": 0.00014780082127900416, "loss": 0.2721, "step": 126000 }, { "epoch": 1.41, "eval_loss": 0.2622178792953491, "eval_runtime": 2.616, "eval_samples_per_second": 878.045, "eval_steps_per_second": 13.761, "step": 126000 }, { "epoch": 1.41, "learning_rate": 0.0001477719445241245, "loss": 0.2718, "step": 126500 }, { "epoch": 1.42, "learning_rate": 0.00014774288248333635, "loss": 0.2715, "step": 127000 }, { "epoch": 1.42, "eval_loss": 0.2606135308742523, "eval_runtime": 2.4928, "eval_samples_per_second": 921.454, "eval_steps_per_second": 14.442, "step": 127000 }, { "epoch": 1.42, "learning_rate": 0.00014771363523609428, "loss": 0.2715, "step": 127500 }, { "epoch": 1.43, "learning_rate": 0.00014768420286235908, "loss": 0.271, "step": 128000 }, { "epoch": 1.43, "eval_loss": 0.2602754831314087, "eval_runtime": 2.6006, "eval_samples_per_second": 883.262, "eval_steps_per_second": 13.843, "step": 128000 }, { "epoch": 1.43, "learning_rate": 0.0001476545854425978, "loss": 0.2708, "step": 128500 }, { "epoch": 1.44, "learning_rate": 0.00014762478305778328, "loss": 0.2711, "step": 129000 }, { "epoch": 1.44, "eval_loss": 0.25960421562194824, "eval_runtime": 2.5551, "eval_samples_per_second": 898.977, "eval_steps_per_second": 14.089, "step": 129000 }, { "epoch": 1.44, "learning_rate": 0.00014759479578939415, "loss": 0.2704, "step": 129500 }, { "epoch": 1.45, "learning_rate": 0.0001475646237194144, "loss": 0.2704, "step": 130000 }, { "epoch": 1.45, "eval_loss": 0.2571023404598236, "eval_runtime": 2.4929, "eval_samples_per_second": 921.433, "eval_steps_per_second": 14.441, "step": 130000 }, { "epoch": 1.46, "learning_rate": 0.00014753426693033336, "loss": 0.2706, "step": 130500 }, { "epoch": 1.46, "learning_rate": 0.00014750372550514533, "loss": 0.2698, "step": 131000 }, { "epoch": 1.46, "eval_loss": 0.2580989897251129, "eval_runtime": 2.5522, "eval_samples_per_second": 900.002, "eval_steps_per_second": 14.105, "step": 131000 }, { "epoch": 1.47, "learning_rate": 0.00014747299952734937, "loss": 0.2698, "step": 131500 }, { "epoch": 1.47, "learning_rate": 0.0001474420890809492, "loss": 0.2693, "step": 132000 }, { "epoch": 1.47, "eval_loss": 0.25762051343917847, "eval_runtime": 2.6493, "eval_samples_per_second": 867.024, "eval_steps_per_second": 13.589, "step": 132000 }, { "epoch": 1.48, "learning_rate": 0.00014741099425045272, "loss": 0.269, "step": 132500 }, { "epoch": 1.48, "learning_rate": 0.00014737971512087202, "loss": 0.2689, "step": 133000 }, { "epoch": 1.48, "eval_loss": 0.2582535445690155, "eval_runtime": 2.5702, "eval_samples_per_second": 893.707, "eval_steps_per_second": 14.007, "step": 133000 }, { "epoch": 1.49, "learning_rate": 0.00014734825177772313, "loss": 0.2686, "step": 133500 }, { "epoch": 1.49, "learning_rate": 0.00014731660430702552, "loss": 0.2684, "step": 134000 }, { "epoch": 1.49, "eval_loss": 0.2552024722099304, "eval_runtime": 2.5578, "eval_samples_per_second": 898.041, "eval_steps_per_second": 14.075, "step": 134000 }, { "epoch": 1.5, "learning_rate": 0.00014728477279530222, "loss": 0.268, "step": 134500 }, { "epoch": 1.51, "learning_rate": 0.00014725275732957937, "loss": 0.2682, "step": 135000 }, { "epoch": 1.51, "eval_loss": 0.2560090124607086, "eval_runtime": 2.5793, "eval_samples_per_second": 890.566, "eval_steps_per_second": 13.958, "step": 135000 }, { "epoch": 1.51, "learning_rate": 0.000147220557997386, "loss": 0.2678, "step": 135500 }, { "epoch": 1.52, "learning_rate": 0.00014718817488675387, "loss": 0.2673, "step": 136000 }, { "epoch": 1.52, "eval_loss": 0.2554505169391632, "eval_runtime": 2.5602, "eval_samples_per_second": 897.208, "eval_steps_per_second": 14.062, "step": 136000 }, { "epoch": 1.52, "learning_rate": 0.0001471556080862172, "loss": 0.2674, "step": 136500 }, { "epoch": 1.53, "learning_rate": 0.00014712285768481235, "loss": 0.2672, "step": 137000 }, { "epoch": 1.53, "eval_loss": 0.2564171552658081, "eval_runtime": 2.6088, "eval_samples_per_second": 880.495, "eval_steps_per_second": 13.8, "step": 137000 }, { "epoch": 1.53, "learning_rate": 0.00014708992377207767, "loss": 0.2666, "step": 137500 }, { "epoch": 1.54, "learning_rate": 0.00014705680643805323, "loss": 0.2671, "step": 138000 }, { "epoch": 1.54, "eval_loss": 0.2554504871368408, "eval_runtime": 2.607, "eval_samples_per_second": 881.099, "eval_steps_per_second": 13.809, "step": 138000 }, { "epoch": 1.54, "learning_rate": 0.00014702350577328052, "loss": 0.2668, "step": 138500 }, { "epoch": 1.55, "learning_rate": 0.00014699002186880232, "loss": 0.2663, "step": 139000 }, { "epoch": 1.55, "eval_loss": 0.2547665238380432, "eval_runtime": 2.5426, "eval_samples_per_second": 903.392, "eval_steps_per_second": 14.159, "step": 139000 }, { "epoch": 1.56, "learning_rate": 0.00014695635481616235, "loss": 0.2662, "step": 139500 }, { "epoch": 1.56, "learning_rate": 0.00014692250470740503, "loss": 0.2659, "step": 140000 }, { "epoch": 1.56, "eval_loss": 0.25474175810813904, "eval_runtime": 2.5555, "eval_samples_per_second": 898.837, "eval_steps_per_second": 14.087, "step": 140000 }, { "epoch": 1.57, "learning_rate": 0.00014688847163507525, "loss": 0.2656, "step": 140500 }, { "epoch": 1.57, "learning_rate": 0.00014685425569221819, "loss": 0.2662, "step": 141000 }, { "epoch": 1.57, "eval_loss": 0.25465691089630127, "eval_runtime": 2.5777, "eval_samples_per_second": 891.114, "eval_steps_per_second": 13.966, "step": 141000 }, { "epoch": 1.58, "learning_rate": 0.00014681985697237885, "loss": 0.2659, "step": 141500 }, { "epoch": 1.58, "learning_rate": 0.00014678527556960207, "loss": 0.265, "step": 142000 }, { "epoch": 1.58, "eval_loss": 0.25828590989112854, "eval_runtime": 2.5027, "eval_samples_per_second": 917.822, "eval_steps_per_second": 14.385, "step": 142000 }, { "epoch": 1.59, "learning_rate": 0.00014675051157843208, "loss": 0.2651, "step": 142500 }, { "epoch": 1.59, "learning_rate": 0.0001467155650939123, "loss": 0.2645, "step": 143000 }, { "epoch": 1.59, "eval_loss": 0.2552579939365387, "eval_runtime": 2.5709, "eval_samples_per_second": 893.469, "eval_steps_per_second": 14.003, "step": 143000 }, { "epoch": 1.6, "learning_rate": 0.00014668043621158508, "loss": 0.2647, "step": 143500 }, { "epoch": 1.61, "learning_rate": 0.00014664512502749141, "loss": 0.2646, "step": 144000 }, { "epoch": 1.61, "eval_loss": 0.2546204626560211, "eval_runtime": 2.5612, "eval_samples_per_second": 896.85, "eval_steps_per_second": 14.056, "step": 144000 }, { "epoch": 1.61, "learning_rate": 0.00014660963163817077, "loss": 0.2642, "step": 144500 }, { "epoch": 1.62, "learning_rate": 0.00014657395614066075, "loss": 0.2639, "step": 145000 }, { "epoch": 1.62, "eval_loss": 0.251667857170105, "eval_runtime": 2.6211, "eval_samples_per_second": 876.335, "eval_steps_per_second": 13.734, "step": 145000 }, { "epoch": 1.62, "learning_rate": 0.0001465380986324967, "loss": 0.2639, "step": 145500 }, { "epoch": 1.63, "learning_rate": 0.0001465020592117118, "loss": 0.2641, "step": 146000 }, { "epoch": 1.63, "eval_loss": 0.25381070375442505, "eval_runtime": 2.4892, "eval_samples_per_second": 922.778, "eval_steps_per_second": 14.462, "step": 146000 }, { "epoch": 1.63, "learning_rate": 0.00014646583797683636, "loss": 0.2638, "step": 146500 }, { "epoch": 1.64, "learning_rate": 0.0001464294350268979, "loss": 0.2633, "step": 147000 }, { "epoch": 1.64, "eval_loss": 0.2526147961616516, "eval_runtime": 2.5593, "eval_samples_per_second": 897.499, "eval_steps_per_second": 14.066, "step": 147000 }, { "epoch": 1.65, "learning_rate": 0.00014639285046142065, "loss": 0.2635, "step": 147500 }, { "epoch": 1.65, "learning_rate": 0.00014635608438042546, "loss": 0.2632, "step": 148000 }, { "epoch": 1.65, "eval_loss": 0.25286561250686646, "eval_runtime": 2.5815, "eval_samples_per_second": 889.785, "eval_steps_per_second": 13.945, "step": 148000 }, { "epoch": 1.66, "learning_rate": 0.00014631913688442936, "loss": 0.2627, "step": 148500 }, { "epoch": 1.66, "learning_rate": 0.00014628200807444543, "loss": 0.2626, "step": 149000 }, { "epoch": 1.66, "eval_loss": 0.25125652551651, "eval_runtime": 2.5016, "eval_samples_per_second": 918.2, "eval_steps_per_second": 14.391, "step": 149000 }, { "epoch": 1.67, "learning_rate": 0.0001462446980519824, "loss": 0.2626, "step": 149500 }, { "epoch": 1.67, "learning_rate": 0.0001462072069190444, "loss": 0.2619, "step": 150000 }, { "epoch": 1.67, "eval_loss": 0.25238075852394104, "eval_runtime": 2.514, "eval_samples_per_second": 913.666, "eval_steps_per_second": 14.32, "step": 150000 }, { "epoch": 1.68, "learning_rate": 0.00014616953477813085, "loss": 0.2626, "step": 150500 }, { "epoch": 1.68, "learning_rate": 0.00014613168173223585, "loss": 0.2618, "step": 151000 }, { "epoch": 1.68, "eval_loss": 0.25222572684288025, "eval_runtime": 2.5557, "eval_samples_per_second": 898.762, "eval_steps_per_second": 14.086, "step": 151000 }, { "epoch": 1.69, "learning_rate": 0.00014609364788484825, "loss": 0.2617, "step": 151500 }, { "epoch": 1.7, "learning_rate": 0.00014605543333995113, "loss": 0.2615, "step": 152000 }, { "epoch": 1.7, "eval_loss": 0.25088632106781006, "eval_runtime": 2.6299, "eval_samples_per_second": 873.405, "eval_steps_per_second": 13.689, "step": 152000 }, { "epoch": 1.7, "learning_rate": 0.00014601703820202154, "loss": 0.2615, "step": 152500 }, { "epoch": 1.71, "learning_rate": 0.00014597846257603038, "loss": 0.2617, "step": 153000 }, { "epoch": 1.71, "eval_loss": 0.2507455348968506, "eval_runtime": 2.5324, "eval_samples_per_second": 907.05, "eval_steps_per_second": 14.216, "step": 153000 }, { "epoch": 1.71, "learning_rate": 0.00014593970656744194, "loss": 0.261, "step": 153500 }, { "epoch": 1.72, "learning_rate": 0.0001459007702822136, "loss": 0.261, "step": 154000 }, { "epoch": 1.72, "eval_loss": 0.25084322690963745, "eval_runtime": 2.5322, "eval_samples_per_second": 907.132, "eval_steps_per_second": 14.217, "step": 154000 }, { "epoch": 1.72, "learning_rate": 0.00014586165382679577, "loss": 0.2612, "step": 154500 }, { "epoch": 1.73, "learning_rate": 0.00014582235730813128, "loss": 0.2614, "step": 155000 }, { "epoch": 1.73, "eval_loss": 0.2498544454574585, "eval_runtime": 2.5674, "eval_samples_per_second": 894.688, "eval_steps_per_second": 14.022, "step": 155000 }, { "epoch": 1.73, "learning_rate": 0.00014578288083365532, "loss": 0.2603, "step": 155500 }, { "epoch": 1.74, "learning_rate": 0.00014574322451129507, "loss": 0.2602, "step": 156000 }, { "epoch": 1.74, "eval_loss": 0.25135913491249084, "eval_runtime": 2.5422, "eval_samples_per_second": 903.538, "eval_steps_per_second": 14.161, "step": 156000 }, { "epoch": 1.75, "learning_rate": 0.00014570338844946943, "loss": 0.2603, "step": 156500 }, { "epoch": 1.75, "learning_rate": 0.00014566337275708863, "loss": 0.2601, "step": 157000 }, { "epoch": 1.75, "eval_loss": 0.24927592277526855, "eval_runtime": 2.5955, "eval_samples_per_second": 884.984, "eval_steps_per_second": 13.87, "step": 157000 }, { "epoch": 1.76, "learning_rate": 0.00014562317754355405, "loss": 0.26, "step": 157500 }, { "epoch": 1.76, "learning_rate": 0.0001455828029187579, "loss": 0.2599, "step": 158000 }, { "epoch": 1.76, "eval_loss": 0.2505401074886322, "eval_runtime": 2.6067, "eval_samples_per_second": 881.183, "eval_steps_per_second": 13.81, "step": 158000 }, { "epoch": 1.77, "learning_rate": 0.00014554224899308285, "loss": 0.2595, "step": 158500 }, { "epoch": 1.77, "learning_rate": 0.00014550151587740178, "loss": 0.2595, "step": 159000 }, { "epoch": 1.77, "eval_loss": 0.24686762690544128, "eval_runtime": 2.5852, "eval_samples_per_second": 888.524, "eval_steps_per_second": 13.925, "step": 159000 }, { "epoch": 1.78, "learning_rate": 0.00014546060368307744, "loss": 0.2589, "step": 159500 }, { "epoch": 1.78, "learning_rate": 0.00014541951252196225, "loss": 0.259, "step": 160000 }, { "epoch": 1.78, "eval_loss": 0.248440220952034, "eval_runtime": 2.56, "eval_samples_per_second": 897.273, "eval_steps_per_second": 14.063, "step": 160000 }, { "epoch": 1.79, "learning_rate": 0.00014537824250639785, "loss": 0.2589, "step": 160500 }, { "epoch": 1.8, "learning_rate": 0.00014533679374921493, "loss": 0.259, "step": 161000 }, { "epoch": 1.8, "eval_loss": 0.24788442254066467, "eval_runtime": 2.5373, "eval_samples_per_second": 905.276, "eval_steps_per_second": 14.188, "step": 161000 }, { "epoch": 1.8, "learning_rate": 0.00014529516636373275, "loss": 0.2581, "step": 161500 }, { "epoch": 1.81, "learning_rate": 0.00014525336046375905, "loss": 0.2582, "step": 162000 }, { "epoch": 1.81, "eval_loss": 0.2487788200378418, "eval_runtime": 2.5002, "eval_samples_per_second": 918.721, "eval_steps_per_second": 14.399, "step": 162000 }, { "epoch": 1.81, "learning_rate": 0.00014521137616358952, "loss": 0.2582, "step": 162500 }, { "epoch": 1.82, "learning_rate": 0.00014516921357800766, "loss": 0.2582, "step": 163000 }, { "epoch": 1.82, "eval_loss": 0.24880746006965637, "eval_runtime": 2.5819, "eval_samples_per_second": 889.642, "eval_steps_per_second": 13.943, "step": 163000 }, { "epoch": 1.82, "learning_rate": 0.00014512687282228432, "loss": 0.258, "step": 163500 }, { "epoch": 1.83, "learning_rate": 0.00014508435401217759, "loss": 0.2572, "step": 164000 }, { "epoch": 1.83, "eval_loss": 0.2471989244222641, "eval_runtime": 2.5023, "eval_samples_per_second": 917.96, "eval_steps_per_second": 14.387, "step": 164000 }, { "epoch": 1.83, "learning_rate": 0.0001450416572639322, "loss": 0.2572, "step": 164500 }, { "epoch": 1.84, "learning_rate": 0.00014499878269427948, "loss": 0.2571, "step": 165000 }, { "epoch": 1.84, "eval_loss": 0.24864718317985535, "eval_runtime": 2.5833, "eval_samples_per_second": 889.167, "eval_steps_per_second": 13.936, "step": 165000 }, { "epoch": 1.85, "learning_rate": 0.00014495573042043683, "loss": 0.2571, "step": 165500 }, { "epoch": 1.85, "learning_rate": 0.00014491250056010758, "loss": 0.2568, "step": 166000 }, { "epoch": 1.85, "eval_loss": 0.24337711930274963, "eval_runtime": 2.518, "eval_samples_per_second": 912.24, "eval_steps_per_second": 14.297, "step": 166000 }, { "epoch": 1.86, "learning_rate": 0.00014486909323148042, "loss": 0.257, "step": 166500 }, { "epoch": 1.86, "learning_rate": 0.00014482550855322943, "loss": 0.257, "step": 167000 }, { "epoch": 1.86, "eval_loss": 0.24700835347175598, "eval_runtime": 2.5407, "eval_samples_per_second": 904.072, "eval_steps_per_second": 14.169, "step": 167000 }, { "epoch": 1.87, "learning_rate": 0.00014478174664451338, "loss": 0.2565, "step": 167500 }, { "epoch": 1.87, "learning_rate": 0.0001447378076249757, "loss": 0.2559, "step": 168000 }, { "epoch": 1.87, "eval_loss": 0.24551132321357727, "eval_runtime": 2.5565, "eval_samples_per_second": 898.491, "eval_steps_per_second": 14.082, "step": 168000 }, { "epoch": 1.88, "learning_rate": 0.00014469369161474398, "loss": 0.2563, "step": 168500 }, { "epoch": 1.88, "learning_rate": 0.00014464939873442973, "loss": 0.2558, "step": 169000 }, { "epoch": 1.88, "eval_loss": 0.2444145828485489, "eval_runtime": 2.5541, "eval_samples_per_second": 899.348, "eval_steps_per_second": 14.095, "step": 169000 }, { "epoch": 1.89, "learning_rate": 0.00014460492910512793, "loss": 0.2558, "step": 169500 }, { "epoch": 1.9, "learning_rate": 0.00014456028284841693, "loss": 0.2558, "step": 170000 }, { "epoch": 1.9, "eval_loss": 0.24412675201892853, "eval_runtime": 2.5587, "eval_samples_per_second": 897.729, "eval_steps_per_second": 14.07, "step": 170000 }, { "epoch": 1.9, "learning_rate": 0.00014451546008635783, "loss": 0.2557, "step": 170500 }, { "epoch": 1.91, "learning_rate": 0.00014447046094149437, "loss": 0.2559, "step": 171000 }, { "epoch": 1.91, "eval_loss": 0.24562311172485352, "eval_runtime": 2.585, "eval_samples_per_second": 888.573, "eval_steps_per_second": 13.926, "step": 171000 }, { "epoch": 1.91, "learning_rate": 0.0001444252855368525, "loss": 0.2556, "step": 171500 }, { "epoch": 1.92, "learning_rate": 0.00014437993399594003, "loss": 0.2554, "step": 172000 }, { "epoch": 1.92, "eval_loss": 0.24504482746124268, "eval_runtime": 2.5424, "eval_samples_per_second": 903.478, "eval_steps_per_second": 14.16, "step": 172000 }, { "epoch": 1.92, "learning_rate": 0.00014433440644274635, "loss": 0.2552, "step": 172500 }, { "epoch": 1.93, "learning_rate": 0.0001442887030017421, "loss": 0.2552, "step": 173000 }, { "epoch": 1.93, "eval_loss": 0.24396482110023499, "eval_runtime": 2.5384, "eval_samples_per_second": 904.888, "eval_steps_per_second": 14.182, "step": 173000 }, { "epoch": 1.94, "learning_rate": 0.00014424282379787865, "loss": 0.255, "step": 173500 }, { "epoch": 1.94, "learning_rate": 0.00014419676895658807, "loss": 0.255, "step": 174000 }, { "epoch": 1.94, "eval_loss": 0.2440991997718811, "eval_runtime": 2.6023, "eval_samples_per_second": 882.671, "eval_steps_per_second": 13.834, "step": 174000 }, { "epoch": 1.95, "learning_rate": 0.00014415053860378254, "loss": 0.2551, "step": 174500 }, { "epoch": 1.95, "learning_rate": 0.000144104132865854, "loss": 0.2544, "step": 175000 }, { "epoch": 1.95, "eval_loss": 0.24473923444747925, "eval_runtime": 2.5664, "eval_samples_per_second": 895.019, "eval_steps_per_second": 14.027, "step": 175000 }, { "epoch": 1.96, "learning_rate": 0.00014405755186967404, "loss": 0.2542, "step": 175500 }, { "epoch": 1.96, "learning_rate": 0.0001440107957425933, "loss": 0.2544, "step": 176000 }, { "epoch": 1.96, "eval_loss": 0.24544279277324677, "eval_runtime": 2.5371, "eval_samples_per_second": 905.36, "eval_steps_per_second": 14.189, "step": 176000 }, { "epoch": 1.97, "learning_rate": 0.00014396386461244123, "loss": 0.2541, "step": 176500 }, { "epoch": 1.97, "learning_rate": 0.0001439167586075258, "loss": 0.2539, "step": 177000 }, { "epoch": 1.97, "eval_loss": 0.24276654422283173, "eval_runtime": 2.5385, "eval_samples_per_second": 904.868, "eval_steps_per_second": 14.182, "step": 177000 }, { "epoch": 1.98, "learning_rate": 0.00014386947785663293, "loss": 0.2538, "step": 177500 }, { "epoch": 1.99, "learning_rate": 0.0001438220224890265, "loss": 0.2537, "step": 178000 }, { "epoch": 1.99, "eval_loss": 0.24593636393547058, "eval_runtime": 2.5426, "eval_samples_per_second": 903.395, "eval_steps_per_second": 14.159, "step": 178000 }, { "epoch": 1.99, "learning_rate": 0.00014377439263444763, "loss": 0.2535, "step": 178500 }, { "epoch": 2.0, "learning_rate": 0.00014372658842311449, "loss": 0.2536, "step": 179000 }, { "epoch": 2.0, "eval_loss": 0.2417013943195343, "eval_runtime": 2.5938, "eval_samples_per_second": 885.579, "eval_steps_per_second": 13.879, "step": 179000 }, { "epoch": 2.0, "learning_rate": 0.00014367860998572198, "loss": 0.2535, "step": 179500 }, { "epoch": 2.01, "learning_rate": 0.00014363045745344137, "loss": 0.2532, "step": 180000 }, { "epoch": 2.01, "eval_loss": 0.24318550527095795, "eval_runtime": 2.633, "eval_samples_per_second": 872.381, "eval_steps_per_second": 13.672, "step": 180000 }, { "epoch": 2.01, "learning_rate": 0.00014358213095791978, "loss": 0.2525, "step": 180500 }, { "epoch": 2.02, "learning_rate": 0.00014353363063128005, "loss": 0.2529, "step": 181000 }, { "epoch": 2.02, "eval_loss": 0.24333837628364563, "eval_runtime": 2.6257, "eval_samples_per_second": 874.816, "eval_steps_per_second": 13.711, "step": 181000 }, { "epoch": 2.02, "learning_rate": 0.00014348495660612015, "loss": 0.2526, "step": 181500 }, { "epoch": 2.03, "learning_rate": 0.0001434361090155131, "loss": 0.2523, "step": 182000 }, { "epoch": 2.03, "eval_loss": 0.2430492788553238, "eval_runtime": 2.7075, "eval_samples_per_second": 848.394, "eval_steps_per_second": 13.297, "step": 182000 }, { "epoch": 2.04, "learning_rate": 0.00014338708799300633, "loss": 0.2528, "step": 182500 }, { "epoch": 2.04, "learning_rate": 0.00014333789367262136, "loss": 0.252, "step": 183000 }, { "epoch": 2.04, "eval_loss": 0.2429981827735901, "eval_runtime": 2.6505, "eval_samples_per_second": 866.622, "eval_steps_per_second": 13.582, "step": 183000 }, { "epoch": 2.05, "learning_rate": 0.00014328852618885365, "loss": 0.252, "step": 183500 }, { "epoch": 2.05, "learning_rate": 0.00014323898567667202, "loss": 0.2517, "step": 184000 }, { "epoch": 2.05, "eval_loss": 0.24395941197872162, "eval_runtime": 2.6201, "eval_samples_per_second": 876.688, "eval_steps_per_second": 13.74, "step": 184000 }, { "epoch": 2.06, "learning_rate": 0.00014318927227151832, "loss": 0.252, "step": 184500 }, { "epoch": 2.06, "learning_rate": 0.00014313938610930712, "loss": 0.2518, "step": 185000 }, { "epoch": 2.06, "eval_loss": 0.23878329992294312, "eval_runtime": 2.6492, "eval_samples_per_second": 867.063, "eval_steps_per_second": 13.589, "step": 185000 }, { "epoch": 2.07, "learning_rate": 0.00014308932732642524, "loss": 0.2515, "step": 185500 }, { "epoch": 2.07, "learning_rate": 0.00014303909605973154, "loss": 0.2514, "step": 186000 }, { "epoch": 2.07, "eval_loss": 0.2405225932598114, "eval_runtime": 2.5624, "eval_samples_per_second": 896.419, "eval_steps_per_second": 14.049, "step": 186000 }, { "epoch": 2.08, "learning_rate": 0.00014298869244655632, "loss": 0.2514, "step": 186500 }, { "epoch": 2.09, "learning_rate": 0.0001429381166247012, "loss": 0.2509, "step": 187000 }, { "epoch": 2.09, "eval_loss": 0.2413066029548645, "eval_runtime": 2.6728, "eval_samples_per_second": 859.401, "eval_steps_per_second": 13.469, "step": 187000 }, { "epoch": 2.09, "learning_rate": 0.0001428873687324385, "loss": 0.251, "step": 187500 }, { "epoch": 2.1, "learning_rate": 0.00014283644890851103, "loss": 0.2513, "step": 188000 }, { "epoch": 2.1, "eval_loss": 0.24260124564170837, "eval_runtime": 2.6325, "eval_samples_per_second": 872.57, "eval_steps_per_second": 13.675, "step": 188000 }, { "epoch": 2.1, "learning_rate": 0.00014278535729213168, "loss": 0.251, "step": 188500 }, { "epoch": 2.11, "learning_rate": 0.00014273409402298291, "loss": 0.2508, "step": 189000 }, { "epoch": 2.11, "eval_loss": 0.24098709225654602, "eval_runtime": 2.7201, "eval_samples_per_second": 844.45, "eval_steps_per_second": 13.235, "step": 189000 }, { "epoch": 2.11, "learning_rate": 0.0001426826592412166, "loss": 0.2508, "step": 189500 }, { "epoch": 2.12, "learning_rate": 0.00014263105308745343, "loss": 0.251, "step": 190000 }, { "epoch": 2.12, "eval_loss": 0.2388133704662323, "eval_runtime": 2.7019, "eval_samples_per_second": 850.14, "eval_steps_per_second": 13.324, "step": 190000 }, { "epoch": 2.12, "learning_rate": 0.0001425792757027827, "loss": 0.2505, "step": 190500 }, { "epoch": 2.13, "learning_rate": 0.00014252732722876176, "loss": 0.2503, "step": 191000 }, { "epoch": 2.13, "eval_loss": 0.24010741710662842, "eval_runtime": 2.646, "eval_samples_per_second": 868.09, "eval_steps_per_second": 13.605, "step": 191000 }, { "epoch": 2.14, "learning_rate": 0.00014247520780741581, "loss": 0.2502, "step": 191500 }, { "epoch": 2.14, "learning_rate": 0.0001424229175812373, "loss": 0.25, "step": 192000 }, { "epoch": 2.14, "eval_loss": 0.24023786187171936, "eval_runtime": 2.599, "eval_samples_per_second": 883.799, "eval_steps_per_second": 13.851, "step": 192000 }, { "epoch": 2.15, "learning_rate": 0.00014237045669318574, "loss": 0.2497, "step": 192500 }, { "epoch": 2.15, "learning_rate": 0.00014231782528668717, "loss": 0.2496, "step": 193000 }, { "epoch": 2.15, "eval_loss": 0.24067841470241547, "eval_runtime": 2.6221, "eval_samples_per_second": 876.005, "eval_steps_per_second": 13.729, "step": 193000 }, { "epoch": 2.16, "learning_rate": 0.00014226502350563392, "loss": 0.2495, "step": 193500 }, { "epoch": 2.16, "learning_rate": 0.00014221205149438394, "loss": 0.2498, "step": 194000 }, { "epoch": 2.16, "eval_loss": 0.23946070671081543, "eval_runtime": 2.679, "eval_samples_per_second": 857.416, "eval_steps_per_second": 13.438, "step": 194000 }, { "epoch": 2.17, "learning_rate": 0.00014215890939776074, "loss": 0.2494, "step": 194500 }, { "epoch": 2.17, "learning_rate": 0.0001421055973610528, "loss": 0.2494, "step": 195000 }, { "epoch": 2.17, "eval_loss": 0.2386200875043869, "eval_runtime": 2.6792, "eval_samples_per_second": 857.337, "eval_steps_per_second": 13.437, "step": 195000 }, { "epoch": 2.18, "learning_rate": 0.00014205211553001317, "loss": 0.2493, "step": 195500 }, { "epoch": 2.19, "learning_rate": 0.00014199846405085913, "loss": 0.2488, "step": 196000 }, { "epoch": 2.19, "eval_loss": 0.2384871244430542, "eval_runtime": 2.7009, "eval_samples_per_second": 850.466, "eval_steps_per_second": 13.329, "step": 196000 }, { "epoch": 2.19, "learning_rate": 0.00014194464307027178, "loss": 0.2488, "step": 196500 }, { "epoch": 2.2, "learning_rate": 0.00014189065273539564, "loss": 0.249, "step": 197000 }, { "epoch": 2.2, "eval_loss": 0.23911140859127045, "eval_runtime": 2.6027, "eval_samples_per_second": 882.552, "eval_steps_per_second": 13.832, "step": 197000 }, { "epoch": 2.2, "learning_rate": 0.0001418364931938382, "loss": 0.2488, "step": 197500 }, { "epoch": 2.21, "learning_rate": 0.00014178216459366958, "loss": 0.2483, "step": 198000 }, { "epoch": 2.21, "eval_loss": 0.2387627363204956, "eval_runtime": 2.5383, "eval_samples_per_second": 904.933, "eval_steps_per_second": 14.183, "step": 198000 }, { "epoch": 2.21, "learning_rate": 0.0001417276670834221, "loss": 0.2486, "step": 198500 }, { "epoch": 2.22, "learning_rate": 0.00014167300081208988, "loss": 0.2486, "step": 199000 }, { "epoch": 2.22, "eval_loss": 0.23652148246765137, "eval_runtime": 2.5882, "eval_samples_per_second": 887.478, "eval_steps_per_second": 13.909, "step": 199000 }, { "epoch": 2.22, "learning_rate": 0.00014161816592912844, "loss": 0.2484, "step": 199500 }, { "epoch": 2.23, "learning_rate": 0.00014156316258445421, "loss": 0.2484, "step": 200000 }, { "epoch": 2.23, "eval_loss": 0.23854629695415497, "eval_runtime": 2.5755, "eval_samples_per_second": 891.852, "eval_steps_per_second": 13.978, "step": 200000 }, { "epoch": 2.24, "learning_rate": 0.00014150799092844428, "loss": 0.2482, "step": 200500 }, { "epoch": 2.24, "learning_rate": 0.00014145265111193583, "loss": 0.2485, "step": 201000 }, { "epoch": 2.24, "eval_loss": 0.2385808229446411, "eval_runtime": 2.6672, "eval_samples_per_second": 861.187, "eval_steps_per_second": 13.497, "step": 201000 }, { "epoch": 2.25, "learning_rate": 0.0001413971432862258, "loss": 0.2474, "step": 201500 }, { "epoch": 2.25, "learning_rate": 0.00014134146760307043, "loss": 0.2478, "step": 202000 }, { "epoch": 2.25, "eval_loss": 0.23785722255706787, "eval_runtime": 2.5981, "eval_samples_per_second": 884.094, "eval_steps_per_second": 13.856, "step": 202000 }, { "epoch": 2.26, "learning_rate": 0.000141285624214685, "loss": 0.2475, "step": 202500 }, { "epoch": 2.26, "learning_rate": 0.00014122961327374313, "loss": 0.2477, "step": 203000 }, { "epoch": 2.26, "eval_loss": 0.23697885870933533, "eval_runtime": 2.6667, "eval_samples_per_second": 861.349, "eval_steps_per_second": 13.5, "step": 203000 }, { "epoch": 2.27, "learning_rate": 0.0001411734349333766, "loss": 0.2472, "step": 203500 }, { "epoch": 2.28, "learning_rate": 0.0001411170893471749, "loss": 0.2475, "step": 204000 }, { "epoch": 2.28, "eval_loss": 0.23942548036575317, "eval_runtime": 2.5207, "eval_samples_per_second": 911.252, "eval_steps_per_second": 14.282, "step": 204000 }, { "epoch": 2.28, "learning_rate": 0.00014106057666918466, "loss": 0.2471, "step": 204500 }, { "epoch": 2.29, "learning_rate": 0.00014100389705390938, "loss": 0.2473, "step": 205000 }, { "epoch": 2.29, "eval_loss": 0.23825885355472565, "eval_runtime": 2.5296, "eval_samples_per_second": 908.049, "eval_steps_per_second": 14.232, "step": 205000 }, { "epoch": 2.29, "learning_rate": 0.00014094705065630903, "loss": 0.247, "step": 205500 }, { "epoch": 2.3, "learning_rate": 0.0001408900376317994, "loss": 0.247, "step": 206000 }, { "epoch": 2.3, "eval_loss": 0.2336592674255371, "eval_runtime": 2.6942, "eval_samples_per_second": 852.576, "eval_steps_per_second": 13.362, "step": 206000 }, { "epoch": 2.3, "learning_rate": 0.00014083285813625197, "loss": 0.2466, "step": 206500 }, { "epoch": 2.31, "learning_rate": 0.0001407755123259933, "loss": 0.2464, "step": 207000 }, { "epoch": 2.31, "eval_loss": 0.2362833321094513, "eval_runtime": 2.6517, "eval_samples_per_second": 866.23, "eval_steps_per_second": 13.576, "step": 207000 }, { "epoch": 2.31, "learning_rate": 0.00014071800035780465, "loss": 0.2465, "step": 207500 }, { "epoch": 2.32, "learning_rate": 0.00014066032238892152, "loss": 0.2484, "step": 208000 }, { "epoch": 2.32, "eval_loss": 0.23411661386489868, "eval_runtime": 2.6833, "eval_samples_per_second": 856.047, "eval_steps_per_second": 13.416, "step": 208000 }, { "epoch": 2.33, "learning_rate": 0.0001406024785770333, "loss": 0.2461, "step": 208500 }, { "epoch": 2.33, "learning_rate": 0.00014054446908028272, "loss": 0.2461, "step": 209000 }, { "epoch": 2.33, "eval_loss": 0.23610949516296387, "eval_runtime": 2.5904, "eval_samples_per_second": 886.74, "eval_steps_per_second": 13.898, "step": 209000 }, { "epoch": 2.34, "learning_rate": 0.0001404862940572656, "loss": 0.2457, "step": 209500 }, { "epoch": 2.34, "learning_rate": 0.00014042795366703018, "loss": 0.2457, "step": 210000 }, { "epoch": 2.34, "eval_loss": 0.236760213971138, "eval_runtime": 2.7246, "eval_samples_per_second": 843.05, "eval_steps_per_second": 13.213, "step": 210000 }, { "epoch": 2.35, "learning_rate": 0.00014036944806907685, "loss": 0.2457, "step": 210500 }, { "epoch": 2.35, "learning_rate": 0.0001403107774233577, "loss": 0.2464, "step": 211000 }, { "epoch": 2.35, "eval_loss": 0.23806537687778473, "eval_runtime": 2.6593, "eval_samples_per_second": 863.775, "eval_steps_per_second": 13.538, "step": 211000 }, { "epoch": 2.36, "learning_rate": 0.00014025194189027604, "loss": 0.2456, "step": 211500 }, { "epoch": 2.36, "learning_rate": 0.00014019294163068597, "loss": 0.2456, "step": 212000 }, { "epoch": 2.36, "eval_loss": 0.2328302264213562, "eval_runtime": 2.5601, "eval_samples_per_second": 897.245, "eval_steps_per_second": 14.062, "step": 212000 }, { "epoch": 2.37, "learning_rate": 0.00014013377680589196, "loss": 0.2452, "step": 212500 }, { "epoch": 2.38, "learning_rate": 0.00014007444757764835, "loss": 0.2457, "step": 213000 }, { "epoch": 2.38, "eval_loss": 0.236920565366745, "eval_runtime": 2.6122, "eval_samples_per_second": 879.347, "eval_steps_per_second": 13.782, "step": 213000 }, { "epoch": 2.38, "learning_rate": 0.000140014954108159, "loss": 0.2455, "step": 213500 }, { "epoch": 2.39, "learning_rate": 0.0001399552965600768, "loss": 0.2449, "step": 214000 }, { "epoch": 2.39, "eval_loss": 0.23769885301589966, "eval_runtime": 2.6992, "eval_samples_per_second": 850.998, "eval_steps_per_second": 13.337, "step": 214000 }, { "epoch": 2.39, "learning_rate": 0.00013989547509650314, "loss": 0.2447, "step": 214500 }, { "epoch": 2.4, "learning_rate": 0.0001398354898809877, "loss": 0.2447, "step": 215000 }, { "epoch": 2.4, "eval_loss": 0.23445510864257812, "eval_runtime": 2.6684, "eval_samples_per_second": 860.825, "eval_steps_per_second": 13.491, "step": 215000 }, { "epoch": 2.4, "learning_rate": 0.00013977534107752776, "loss": 0.2448, "step": 215500 }, { "epoch": 2.41, "learning_rate": 0.0001397150288505678, "loss": 0.2446, "step": 216000 }, { "epoch": 2.41, "eval_loss": 0.23636819422245026, "eval_runtime": 2.5563, "eval_samples_per_second": 898.582, "eval_steps_per_second": 14.083, "step": 216000 }, { "epoch": 2.41, "learning_rate": 0.0001396545533649992, "loss": 0.2448, "step": 216500 }, { "epoch": 2.42, "learning_rate": 0.00013959391478615959, "loss": 0.2445, "step": 217000 }, { "epoch": 2.42, "eval_loss": 0.23241940140724182, "eval_runtime": 2.6771, "eval_samples_per_second": 858.01, "eval_steps_per_second": 13.447, "step": 217000 }, { "epoch": 2.43, "learning_rate": 0.00013953311327983254, "loss": 0.2445, "step": 217500 }, { "epoch": 2.43, "learning_rate": 0.00013947214901224706, "loss": 0.2442, "step": 218000 }, { "epoch": 2.43, "eval_loss": 0.23456549644470215, "eval_runtime": 2.606, "eval_samples_per_second": 881.439, "eval_steps_per_second": 13.814, "step": 218000 }, { "epoch": 2.44, "learning_rate": 0.0001394110221500771, "loss": 0.2444, "step": 218500 }, { "epoch": 2.44, "learning_rate": 0.0001393497328604412, "loss": 0.2445, "step": 219000 }, { "epoch": 2.44, "eval_loss": 0.23404227197170258, "eval_runtime": 2.5738, "eval_samples_per_second": 892.457, "eval_steps_per_second": 13.987, "step": 219000 }, { "epoch": 2.45, "learning_rate": 0.00013928828131090193, "loss": 0.2442, "step": 219500 }, { "epoch": 2.45, "learning_rate": 0.00013922666766946545, "loss": 0.2435, "step": 220000 }, { "epoch": 2.45, "eval_loss": 0.2341037541627884, "eval_runtime": 2.6129, "eval_samples_per_second": 879.091, "eval_steps_per_second": 13.778, "step": 220000 }, { "epoch": 2.46, "learning_rate": 0.00013916489210458118, "loss": 0.2439, "step": 220500 }, { "epoch": 2.46, "learning_rate": 0.00013910295478514106, "loss": 0.2433, "step": 221000 }, { "epoch": 2.46, "eval_loss": 0.23220616579055786, "eval_runtime": 2.6144, "eval_samples_per_second": 878.61, "eval_steps_per_second": 13.77, "step": 221000 }, { "epoch": 2.47, "learning_rate": 0.0001390408558804794, "loss": 0.2436, "step": 221500 }, { "epoch": 2.48, "learning_rate": 0.0001389785955603722, "loss": 0.2435, "step": 222000 }, { "epoch": 2.48, "eval_loss": 0.23245947062969208, "eval_runtime": 2.5605, "eval_samples_per_second": 897.091, "eval_steps_per_second": 14.06, "step": 222000 }, { "epoch": 2.48, "learning_rate": 0.00013891617399503688, "loss": 0.2437, "step": 222500 }, { "epoch": 2.49, "learning_rate": 0.00013885359135513154, "loss": 0.2434, "step": 223000 }, { "epoch": 2.49, "eval_loss": 0.23385584354400635, "eval_runtime": 2.5799, "eval_samples_per_second": 890.337, "eval_steps_per_second": 13.954, "step": 223000 }, { "epoch": 2.49, "learning_rate": 0.00013879084781175476, "loss": 0.2432, "step": 223500 }, { "epoch": 2.5, "learning_rate": 0.000138727943536445, "loss": 0.243, "step": 224000 }, { "epoch": 2.5, "eval_loss": 0.2319779098033905, "eval_runtime": 2.5676, "eval_samples_per_second": 894.613, "eval_steps_per_second": 14.021, "step": 224000 }, { "epoch": 2.5, "learning_rate": 0.0001386648787011801, "loss": 0.2428, "step": 224500 }, { "epoch": 2.51, "learning_rate": 0.00013860165347837698, "loss": 0.2428, "step": 225000 }, { "epoch": 2.51, "eval_loss": 0.23255208134651184, "eval_runtime": 2.611, "eval_samples_per_second": 879.75, "eval_steps_per_second": 13.788, "step": 225000 }, { "epoch": 2.51, "learning_rate": 0.00013853826804089095, "loss": 0.2426, "step": 225500 }, { "epoch": 2.52, "learning_rate": 0.00013847472256201535, "loss": 0.243, "step": 226000 }, { "epoch": 2.52, "eval_loss": 0.23200708627700806, "eval_runtime": 2.619, "eval_samples_per_second": 877.053, "eval_steps_per_second": 13.746, "step": 226000 }, { "epoch": 2.53, "learning_rate": 0.00013841101721548112, "loss": 0.2419, "step": 226500 }, { "epoch": 2.53, "learning_rate": 0.00013834715217545625, "loss": 0.2428, "step": 227000 }, { "epoch": 2.53, "eval_loss": 0.23063500225543976, "eval_runtime": 2.5764, "eval_samples_per_second": 891.542, "eval_steps_per_second": 13.973, "step": 227000 }, { "epoch": 2.54, "learning_rate": 0.00013828312761654532, "loss": 0.2424, "step": 227500 }, { "epoch": 2.54, "learning_rate": 0.000138218943713789, "loss": 0.2423, "step": 228000 }, { "epoch": 2.54, "eval_loss": 0.23263856768608093, "eval_runtime": 2.6433, "eval_samples_per_second": 868.989, "eval_steps_per_second": 13.619, "step": 228000 }, { "epoch": 2.55, "learning_rate": 0.00013815460064266368, "loss": 0.2425, "step": 228500 }, { "epoch": 2.55, "learning_rate": 0.0001380900985790808, "loss": 0.2421, "step": 229000 }, { "epoch": 2.55, "eval_loss": 0.2327061891555786, "eval_runtime": 2.6197, "eval_samples_per_second": 876.814, "eval_steps_per_second": 13.742, "step": 229000 }, { "epoch": 2.56, "learning_rate": 0.0001380254376993866, "loss": 0.242, "step": 229500 }, { "epoch": 2.57, "learning_rate": 0.00013796061818036138, "loss": 0.2417, "step": 230000 }, { "epoch": 2.57, "eval_loss": 0.23025751113891602, "eval_runtime": 2.6113, "eval_samples_per_second": 879.626, "eval_steps_per_second": 13.786, "step": 230000 }, { "epoch": 2.57, "learning_rate": 0.00013789564019921931, "loss": 0.2418, "step": 230500 }, { "epoch": 2.58, "learning_rate": 0.00013783050393360768, "loss": 0.2418, "step": 231000 }, { "epoch": 2.58, "eval_loss": 0.23109453916549683, "eval_runtime": 2.644, "eval_samples_per_second": 868.746, "eval_steps_per_second": 13.616, "step": 231000 }, { "epoch": 2.58, "learning_rate": 0.00013776520956160655, "loss": 0.2417, "step": 231500 }, { "epoch": 2.59, "learning_rate": 0.0001376997572617282, "loss": 0.2414, "step": 232000 }, { "epoch": 2.59, "eval_loss": 0.23072299361228943, "eval_runtime": 2.7341, "eval_samples_per_second": 840.132, "eval_steps_per_second": 13.167, "step": 232000 }, { "epoch": 2.59, "learning_rate": 0.0001376341472129168, "loss": 0.2414, "step": 232500 }, { "epoch": 2.6, "learning_rate": 0.00013756837959454766, "loss": 0.2415, "step": 233000 }, { "epoch": 2.6, "eval_loss": 0.22945719957351685, "eval_runtime": 2.6629, "eval_samples_per_second": 862.583, "eval_steps_per_second": 13.519, "step": 233000 }, { "epoch": 2.6, "learning_rate": 0.00013750245458642692, "loss": 0.2411, "step": 233500 }, { "epoch": 2.61, "learning_rate": 0.0001374363723687911, "loss": 0.2411, "step": 234000 }, { "epoch": 2.61, "eval_loss": 0.22973643243312836, "eval_runtime": 2.6682, "eval_samples_per_second": 860.868, "eval_steps_per_second": 13.492, "step": 234000 }, { "epoch": 2.62, "learning_rate": 0.0001373701331223064, "loss": 0.2409, "step": 234500 }, { "epoch": 2.62, "learning_rate": 0.00013730373702806846, "loss": 0.241, "step": 235000 }, { "epoch": 2.62, "eval_loss": 0.23086461424827576, "eval_runtime": 2.643, "eval_samples_per_second": 869.097, "eval_steps_per_second": 13.621, "step": 235000 }, { "epoch": 2.63, "learning_rate": 0.0001372371842676016, "loss": 0.2408, "step": 235500 }, { "epoch": 2.63, "learning_rate": 0.00013717047502285855, "loss": 0.2411, "step": 236000 }, { "epoch": 2.63, "eval_loss": 0.22972989082336426, "eval_runtime": 2.6826, "eval_samples_per_second": 856.274, "eval_steps_per_second": 13.42, "step": 236000 }, { "epoch": 2.64, "learning_rate": 0.0001371036094762198, "loss": 0.2407, "step": 236500 }, { "epoch": 2.64, "learning_rate": 0.0001370365878104933, "loss": 0.241, "step": 237000 }, { "epoch": 2.64, "eval_loss": 0.2288552075624466, "eval_runtime": 2.7019, "eval_samples_per_second": 850.158, "eval_steps_per_second": 13.324, "step": 237000 }, { "epoch": 2.65, "learning_rate": 0.00013696941020891363, "loss": 0.2406, "step": 237500 }, { "epoch": 2.65, "learning_rate": 0.00013690207685514185, "loss": 0.2402, "step": 238000 }, { "epoch": 2.65, "eval_loss": 0.2302161604166031, "eval_runtime": 2.6823, "eval_samples_per_second": 856.355, "eval_steps_per_second": 13.421, "step": 238000 }, { "epoch": 2.66, "learning_rate": 0.0001368345879332647, "loss": 0.2406, "step": 238500 }, { "epoch": 2.67, "learning_rate": 0.0001367669436277944, "loss": 0.2403, "step": 239000 }, { "epoch": 2.67, "eval_loss": 0.2315613031387329, "eval_runtime": 2.609, "eval_samples_per_second": 880.43, "eval_steps_per_second": 13.799, "step": 239000 }, { "epoch": 2.67, "learning_rate": 0.00013669914412366783, "loss": 0.2404, "step": 239500 }, { "epoch": 2.68, "learning_rate": 0.0001366311896062463, "loss": 0.2403, "step": 240000 }, { "epoch": 2.68, "eval_loss": 0.2302522510290146, "eval_runtime": 2.7632, "eval_samples_per_second": 831.29, "eval_steps_per_second": 13.028, "step": 240000 }, { "epoch": 2.68, "learning_rate": 0.00013656308026131485, "loss": 0.2403, "step": 240500 }, { "epoch": 2.69, "learning_rate": 0.00013649481627508181, "loss": 0.2401, "step": 241000 }, { "epoch": 2.69, "eval_loss": 0.23056615889072418, "eval_runtime": 2.5909, "eval_samples_per_second": 886.57, "eval_steps_per_second": 13.895, "step": 241000 }, { "epoch": 2.69, "learning_rate": 0.00013642639783417832, "loss": 0.2401, "step": 241500 }, { "epoch": 2.7, "learning_rate": 0.0001363578251256578, "loss": 0.2394, "step": 242000 }, { "epoch": 2.7, "eval_loss": 0.23005209863185883, "eval_runtime": 2.6861, "eval_samples_per_second": 855.147, "eval_steps_per_second": 13.402, "step": 242000 }, { "epoch": 2.7, "learning_rate": 0.0001362890983369954, "loss": 0.2395, "step": 242500 }, { "epoch": 2.71, "learning_rate": 0.00013622021765608754, "loss": 0.2399, "step": 243000 }, { "epoch": 2.71, "eval_loss": 0.22968289256095886, "eval_runtime": 2.6547, "eval_samples_per_second": 865.265, "eval_steps_per_second": 13.561, "step": 243000 }, { "epoch": 2.72, "learning_rate": 0.00013615118327125136, "loss": 0.2395, "step": 243500 }, { "epoch": 2.72, "learning_rate": 0.00013608199537122425, "loss": 0.2389, "step": 244000 }, { "epoch": 2.72, "eval_loss": 0.23073868453502655, "eval_runtime": 2.6945, "eval_samples_per_second": 852.485, "eval_steps_per_second": 13.361, "step": 244000 }, { "epoch": 2.73, "learning_rate": 0.0001360126541451633, "loss": 0.2394, "step": 244500 }, { "epoch": 2.73, "learning_rate": 0.0001359431597826447, "loss": 0.239, "step": 245000 }, { "epoch": 2.73, "eval_loss": 0.22961042821407318, "eval_runtime": 2.5955, "eval_samples_per_second": 885.008, "eval_steps_per_second": 13.87, "step": 245000 }, { "epoch": 2.74, "learning_rate": 0.0001358735124736635, "loss": 0.24, "step": 245500 }, { "epoch": 2.74, "learning_rate": 0.0001358037124086327, "loss": 0.2393, "step": 246000 }, { "epoch": 2.74, "eval_loss": 0.23064970970153809, "eval_runtime": 2.6363, "eval_samples_per_second": 871.309, "eval_steps_per_second": 13.656, "step": 246000 }, { "epoch": 2.75, "learning_rate": 0.00013573375977838303, "loss": 0.2389, "step": 246500 }, { "epoch": 2.75, "learning_rate": 0.00013566365477416233, "loss": 0.2386, "step": 247000 }, { "epoch": 2.75, "eval_loss": 0.22737060487270355, "eval_runtime": 2.5775, "eval_samples_per_second": 891.16, "eval_steps_per_second": 13.967, "step": 247000 }, { "epoch": 2.76, "learning_rate": 0.00013559339758763495, "loss": 0.239, "step": 247500 }, { "epoch": 2.77, "learning_rate": 0.00013552298841088144, "loss": 0.2389, "step": 248000 }, { "epoch": 2.77, "eval_loss": 0.22698281705379486, "eval_runtime": 2.6869, "eval_samples_per_second": 854.901, "eval_steps_per_second": 13.399, "step": 248000 }, { "epoch": 2.77, "learning_rate": 0.00013545242743639774, "loss": 0.2387, "step": 248500 }, { "epoch": 2.78, "learning_rate": 0.00013538171485709486, "loss": 0.2382, "step": 249000 }, { "epoch": 2.78, "eval_loss": 0.22949343919754028, "eval_runtime": 2.6923, "eval_samples_per_second": 853.185, "eval_steps_per_second": 13.372, "step": 249000 }, { "epoch": 2.78, "learning_rate": 0.00013531085086629832, "loss": 0.2384, "step": 249500 }, { "epoch": 2.79, "learning_rate": 0.00013523983565774753, "loss": 0.2384, "step": 250000 }, { "epoch": 2.79, "eval_loss": 0.22683538496494293, "eval_runtime": 2.6574, "eval_samples_per_second": 864.374, "eval_steps_per_second": 13.547, "step": 250000 }, { "epoch": 2.79, "learning_rate": 0.0001351686694255954, "loss": 0.2385, "step": 250500 }, { "epoch": 2.8, "learning_rate": 0.00013509735236440766, "loss": 0.2379, "step": 251000 }, { "epoch": 2.8, "eval_loss": 0.2289341241121292, "eval_runtime": 2.5642, "eval_samples_per_second": 895.794, "eval_steps_per_second": 14.039, "step": 251000 }, { "epoch": 2.8, "learning_rate": 0.0001350258846691625, "loss": 0.238, "step": 251500 }, { "epoch": 2.81, "learning_rate": 0.00013495426653524972, "loss": 0.2381, "step": 252000 }, { "epoch": 2.81, "eval_loss": 0.22911319136619568, "eval_runtime": 2.6235, "eval_samples_per_second": 875.54, "eval_steps_per_second": 13.722, "step": 252000 }, { "epoch": 2.82, "learning_rate": 0.0001348824981584707, "loss": 0.2383, "step": 252500 }, { "epoch": 2.82, "learning_rate": 0.00013481057973503742, "loss": 0.2378, "step": 253000 }, { "epoch": 2.82, "eval_loss": 0.22786130011081696, "eval_runtime": 2.5806, "eval_samples_per_second": 890.1, "eval_steps_per_second": 13.95, "step": 253000 }, { "epoch": 2.83, "learning_rate": 0.00013473851146157204, "loss": 0.2382, "step": 253500 }, { "epoch": 2.83, "learning_rate": 0.00013466629353510651, "loss": 0.2376, "step": 254000 }, { "epoch": 2.83, "eval_loss": 0.2276422381401062, "eval_runtime": 2.6007, "eval_samples_per_second": 883.226, "eval_steps_per_second": 13.842, "step": 254000 }, { "epoch": 2.84, "learning_rate": 0.00013459392615308192, "loss": 0.2382, "step": 254500 }, { "epoch": 2.84, "learning_rate": 0.00013452140951334787, "loss": 0.2373, "step": 255000 }, { "epoch": 2.84, "eval_loss": 0.22781763970851898, "eval_runtime": 2.5831, "eval_samples_per_second": 889.242, "eval_steps_per_second": 13.937, "step": 255000 }, { "epoch": 2.85, "learning_rate": 0.00013444874381416208, "loss": 0.2371, "step": 255500 }, { "epoch": 2.86, "learning_rate": 0.00013437592925418985, "loss": 0.2374, "step": 256000 }, { "epoch": 2.86, "eval_loss": 0.22769607603549957, "eval_runtime": 2.6139, "eval_samples_per_second": 878.768, "eval_steps_per_second": 13.773, "step": 256000 }, { "epoch": 2.86, "learning_rate": 0.00013430296603250338, "loss": 0.2375, "step": 256500 }, { "epoch": 2.87, "learning_rate": 0.00013422985434858133, "loss": 0.2372, "step": 257000 }, { "epoch": 2.87, "eval_loss": 0.22923217713832855, "eval_runtime": 2.5056, "eval_samples_per_second": 916.749, "eval_steps_per_second": 14.368, "step": 257000 }, { "epoch": 2.87, "learning_rate": 0.00013415659440230824, "loss": 0.2375, "step": 257500 }, { "epoch": 2.88, "learning_rate": 0.00013408318639397405, "loss": 0.2364, "step": 258000 }, { "epoch": 2.88, "eval_loss": 0.2288464456796646, "eval_runtime": 2.5629, "eval_samples_per_second": 896.264, "eval_steps_per_second": 14.047, "step": 258000 }, { "epoch": 2.88, "learning_rate": 0.00013400963052427337, "loss": 0.237, "step": 258500 }, { "epoch": 2.89, "learning_rate": 0.00013393592699430525, "loss": 0.2371, "step": 259000 }, { "epoch": 2.89, "eval_loss": 0.22805295884609222, "eval_runtime": 2.5516, "eval_samples_per_second": 900.222, "eval_steps_per_second": 14.109, "step": 259000 }, { "epoch": 2.89, "learning_rate": 0.00013386207600557222, "loss": 0.2363, "step": 259500 }, { "epoch": 2.9, "learning_rate": 0.00013378807775998012, "loss": 0.237, "step": 260000 }, { "epoch": 2.9, "eval_loss": 0.22537867724895477, "eval_runtime": 2.54, "eval_samples_per_second": 904.347, "eval_steps_per_second": 14.173, "step": 260000 }, { "epoch": 2.91, "learning_rate": 0.0001337139324598373, "loss": 0.2369, "step": 260500 }, { "epoch": 2.91, "learning_rate": 0.00013363964030785422, "loss": 0.2366, "step": 261000 }, { "epoch": 2.91, "eval_loss": 0.22471630573272705, "eval_runtime": 2.4998, "eval_samples_per_second": 918.881, "eval_steps_per_second": 14.401, "step": 261000 }, { "epoch": 2.92, "learning_rate": 0.00013356520150714277, "loss": 0.2368, "step": 261500 }, { "epoch": 2.92, "learning_rate": 0.00013349061626121578, "loss": 0.2366, "step": 262000 }, { "epoch": 2.92, "eval_loss": 0.22358164191246033, "eval_runtime": 2.454, "eval_samples_per_second": 936.011, "eval_steps_per_second": 14.67, "step": 262000 }, { "epoch": 2.93, "learning_rate": 0.00013341588477398645, "loss": 0.2365, "step": 262500 }, { "epoch": 2.93, "learning_rate": 0.00013334100724976783, "loss": 0.2358, "step": 263000 }, { "epoch": 2.93, "eval_loss": 0.22610020637512207, "eval_runtime": 2.4695, "eval_samples_per_second": 930.133, "eval_steps_per_second": 14.578, "step": 263000 }, { "epoch": 2.94, "learning_rate": 0.00013326598389327223, "loss": 0.2368, "step": 263500 }, { "epoch": 2.94, "learning_rate": 0.0001331908149096106, "loss": 0.2361, "step": 264000 }, { "epoch": 2.94, "eval_loss": 0.22605933248996735, "eval_runtime": 2.4948, "eval_samples_per_second": 920.701, "eval_steps_per_second": 14.43, "step": 264000 }, { "epoch": 2.95, "learning_rate": 0.0001331155005042921, "loss": 0.2358, "step": 264500 }, { "epoch": 2.96, "learning_rate": 0.00013304004088322342, "loss": 0.2361, "step": 265000 }, { "epoch": 2.96, "eval_loss": 0.22790436446666718, "eval_runtime": 2.407, "eval_samples_per_second": 954.282, "eval_steps_per_second": 14.956, "step": 265000 }, { "epoch": 2.96, "learning_rate": 0.00013296443625270828, "loss": 0.2363, "step": 265500 }, { "epoch": 2.97, "learning_rate": 0.00013288868681944692, "loss": 0.236, "step": 266000 }, { "epoch": 2.97, "eval_loss": 0.22779372334480286, "eval_runtime": 2.5112, "eval_samples_per_second": 914.702, "eval_steps_per_second": 14.336, "step": 266000 }, { "epoch": 2.97, "learning_rate": 0.00013281279279053532, "loss": 0.2357, "step": 266500 }, { "epoch": 2.98, "learning_rate": 0.00013273675437346487, "loss": 0.2353, "step": 267000 }, { "epoch": 2.98, "eval_loss": 0.22502335906028748, "eval_runtime": 2.3705, "eval_samples_per_second": 968.999, "eval_steps_per_second": 15.187, "step": 267000 }, { "epoch": 2.98, "learning_rate": 0.00013266057177612172, "loss": 0.2356, "step": 267500 }, { "epoch": 2.99, "learning_rate": 0.00013258424520678618, "loss": 0.2357, "step": 268000 }, { "epoch": 2.99, "eval_loss": 0.2256162017583847, "eval_runtime": 15.618, "eval_samples_per_second": 147.074, "eval_steps_per_second": 2.305, "step": 268000 }, { "epoch": 2.99, "learning_rate": 0.00013250777487413217, "loss": 0.2355, "step": 268500 }, { "epoch": 3.0, "learning_rate": 0.00013243116098722663, "loss": 0.2357, "step": 269000 }, { "epoch": 3.0, "eval_loss": 0.2276550680398941, "eval_runtime": 2.6194, "eval_samples_per_second": 876.914, "eval_steps_per_second": 13.744, "step": 269000 }, { "epoch": 3.01, "learning_rate": 0.00013235440375552906, "loss": 0.2354, "step": 269500 }, { "epoch": 3.01, "learning_rate": 0.00013227750338889077, "loss": 0.2354, "step": 270000 }, { "epoch": 3.01, "eval_loss": 0.22490766644477844, "eval_runtime": 2.524, "eval_samples_per_second": 910.07, "eval_steps_per_second": 14.263, "step": 270000 }, { "epoch": 3.02, "learning_rate": 0.0001322004600975544, "loss": 0.2351, "step": 270500 }, { "epoch": 3.02, "learning_rate": 0.00013212327409215343, "loss": 0.2349, "step": 271000 }, { "epoch": 3.02, "eval_loss": 0.2255261242389679, "eval_runtime": 2.5651, "eval_samples_per_second": 895.473, "eval_steps_per_second": 14.034, "step": 271000 }, { "epoch": 3.03, "learning_rate": 0.0001320459455837114, "loss": 0.2348, "step": 271500 }, { "epoch": 3.03, "learning_rate": 0.0001319684747836415, "loss": 0.2348, "step": 272000 }, { "epoch": 3.03, "eval_loss": 0.2229447066783905, "eval_runtime": 2.6131, "eval_samples_per_second": 879.025, "eval_steps_per_second": 13.777, "step": 272000 }, { "epoch": 3.04, "learning_rate": 0.00013189086190374595, "loss": 0.2348, "step": 272500 }, { "epoch": 3.04, "learning_rate": 0.0001318131071562154, "loss": 0.2345, "step": 273000 }, { "epoch": 3.04, "eval_loss": 0.22524061799049377, "eval_runtime": 2.5512, "eval_samples_per_second": 900.358, "eval_steps_per_second": 14.111, "step": 273000 }, { "epoch": 3.05, "learning_rate": 0.0001317352107536284, "loss": 0.2345, "step": 273500 }, { "epoch": 3.06, "learning_rate": 0.00013165717290895067, "loss": 0.2344, "step": 274000 }, { "epoch": 3.06, "eval_loss": 0.22475993633270264, "eval_runtime": 2.529, "eval_samples_per_second": 908.276, "eval_steps_per_second": 14.235, "step": 274000 }, { "epoch": 3.06, "learning_rate": 0.00013157899383553474, "loss": 0.2344, "step": 274500 }, { "epoch": 3.07, "learning_rate": 0.0001315006737471192, "loss": 0.2343, "step": 275000 }, { "epoch": 3.07, "eval_loss": 0.2223171889781952, "eval_runtime": 2.571, "eval_samples_per_second": 893.41, "eval_steps_per_second": 14.002, "step": 275000 }, { "epoch": 3.07, "learning_rate": 0.0001314222128578282, "loss": 0.2342, "step": 275500 }, { "epoch": 3.08, "learning_rate": 0.0001313436113821708, "loss": 0.2344, "step": 276000 }, { "epoch": 3.08, "eval_loss": 0.22429585456848145, "eval_runtime": 2.7431, "eval_samples_per_second": 837.378, "eval_steps_per_second": 13.124, "step": 276000 }, { "epoch": 3.08, "learning_rate": 0.0001312648695350404, "loss": 0.2344, "step": 276500 }, { "epoch": 3.09, "learning_rate": 0.00013118598753171425, "loss": 0.2343, "step": 277000 }, { "epoch": 3.09, "eval_loss": 0.22134767472743988, "eval_runtime": 2.5289, "eval_samples_per_second": 908.315, "eval_steps_per_second": 14.236, "step": 277000 }, { "epoch": 3.09, "learning_rate": 0.00013110696558785273, "loss": 0.2337, "step": 277500 }, { "epoch": 3.1, "learning_rate": 0.0001310278039194988, "loss": 0.2335, "step": 278000 }, { "epoch": 3.1, "eval_loss": 0.22428634762763977, "eval_runtime": 2.646, "eval_samples_per_second": 868.114, "eval_steps_per_second": 13.606, "step": 278000 }, { "epoch": 3.11, "learning_rate": 0.00013094850274307745, "loss": 0.234, "step": 278500 }, { "epoch": 3.11, "learning_rate": 0.00013086906227539506, "loss": 0.2336, "step": 279000 }, { "epoch": 3.11, "eval_loss": 0.22171786427497864, "eval_runtime": 4.1933, "eval_samples_per_second": 547.781, "eval_steps_per_second": 8.585, "step": 279000 }, { "epoch": 3.12, "learning_rate": 0.00013078948273363884, "loss": 0.2336, "step": 279500 }, { "epoch": 3.12, "learning_rate": 0.00013070976433537623, "loss": 0.2339, "step": 280000 }, { "epoch": 3.12, "eval_loss": 0.22290877997875214, "eval_runtime": 2.542, "eval_samples_per_second": 903.625, "eval_steps_per_second": 14.162, "step": 280000 }, { "epoch": 3.13, "learning_rate": 0.00013062990729855427, "loss": 0.2338, "step": 280500 }, { "epoch": 3.13, "learning_rate": 0.00013054991184149905, "loss": 0.2337, "step": 281000 }, { "epoch": 3.13, "eval_loss": 0.2214738130569458, "eval_runtime": 2.5831, "eval_samples_per_second": 889.227, "eval_steps_per_second": 13.937, "step": 281000 }, { "epoch": 3.14, "learning_rate": 0.00013046977818291508, "loss": 0.2337, "step": 281500 }, { "epoch": 3.15, "learning_rate": 0.00013038950654188476, "loss": 0.2334, "step": 282000 }, { "epoch": 3.15, "eval_loss": 0.2210046350955963, "eval_runtime": 2.5814, "eval_samples_per_second": 889.83, "eval_steps_per_second": 13.946, "step": 282000 }, { "epoch": 3.15, "learning_rate": 0.00013030909713786768, "loss": 0.2334, "step": 282500 }, { "epoch": 3.16, "learning_rate": 0.00013022855019070005, "loss": 0.2335, "step": 283000 }, { "epoch": 3.16, "eval_loss": 0.22165259718894958, "eval_runtime": 2.6243, "eval_samples_per_second": 875.295, "eval_steps_per_second": 13.718, "step": 283000 }, { "epoch": 3.16, "learning_rate": 0.00013014786592059418, "loss": 0.2331, "step": 283500 }, { "epoch": 3.17, "learning_rate": 0.0001300670445481378, "loss": 0.2328, "step": 284000 }, { "epoch": 3.17, "eval_loss": 0.2222437709569931, "eval_runtime": 2.5682, "eval_samples_per_second": 894.402, "eval_steps_per_second": 14.018, "step": 284000 }, { "epoch": 3.17, "learning_rate": 0.0001299860862942934, "loss": 0.2328, "step": 284500 }, { "epoch": 3.18, "learning_rate": 0.0001299049913803978, "loss": 0.2326, "step": 285000 }, { "epoch": 3.18, "eval_loss": 0.22183333337306976, "eval_runtime": 2.553, "eval_samples_per_second": 899.722, "eval_steps_per_second": 14.101, "step": 285000 }, { "epoch": 3.18, "learning_rate": 0.00012982376002816138, "loss": 0.2328, "step": 285500 }, { "epoch": 3.19, "learning_rate": 0.00012974239245966754, "loss": 0.2332, "step": 286000 }, { "epoch": 3.19, "eval_loss": 0.22312231361865997, "eval_runtime": 2.5852, "eval_samples_per_second": 888.513, "eval_steps_per_second": 13.925, "step": 286000 }, { "epoch": 3.2, "learning_rate": 0.00012966088889737216, "loss": 0.233, "step": 286500 }, { "epoch": 3.2, "learning_rate": 0.0001295792495641028, "loss": 0.2325, "step": 287000 }, { "epoch": 3.2, "eval_loss": 0.22235532104969025, "eval_runtime": 2.6014, "eval_samples_per_second": 882.973, "eval_steps_per_second": 13.838, "step": 287000 }, { "epoch": 3.21, "learning_rate": 0.0001294974746830583, "loss": 0.2325, "step": 287500 }, { "epoch": 3.21, "learning_rate": 0.00012941556447780813, "loss": 0.2326, "step": 288000 }, { "epoch": 3.21, "eval_loss": 0.22186285257339478, "eval_runtime": 2.5183, "eval_samples_per_second": 912.138, "eval_steps_per_second": 14.296, "step": 288000 }, { "epoch": 3.22, "learning_rate": 0.00012933351917229156, "loss": 0.2324, "step": 288500 }, { "epoch": 3.22, "learning_rate": 0.0001292513389908174, "loss": 0.2323, "step": 289000 }, { "epoch": 3.22, "eval_loss": 0.22211854159832, "eval_runtime": 2.5459, "eval_samples_per_second": 902.227, "eval_steps_per_second": 14.14, "step": 289000 }, { "epoch": 3.23, "learning_rate": 0.00012916902415806305, "loss": 0.2322, "step": 289500 }, { "epoch": 3.23, "learning_rate": 0.0001290865748990742, "loss": 0.2321, "step": 290000 }, { "epoch": 3.23, "eval_loss": 0.22493046522140503, "eval_runtime": 2.5418, "eval_samples_per_second": 903.676, "eval_steps_per_second": 14.163, "step": 290000 }, { "epoch": 3.24, "learning_rate": 0.00012900399143926395, "loss": 0.2322, "step": 290500 }, { "epoch": 3.25, "learning_rate": 0.00012892127400441228, "loss": 0.232, "step": 291000 }, { "epoch": 3.25, "eval_loss": 0.22251969575881958, "eval_runtime": 2.6362, "eval_samples_per_second": 871.331, "eval_steps_per_second": 13.656, "step": 291000 }, { "epoch": 3.25, "learning_rate": 0.00012883842282066557, "loss": 0.2322, "step": 291500 }, { "epoch": 3.26, "learning_rate": 0.00012875543811453576, "loss": 0.2321, "step": 292000 }, { "epoch": 3.26, "eval_loss": 0.22058303654193878, "eval_runtime": 2.601, "eval_samples_per_second": 883.125, "eval_steps_per_second": 13.841, "step": 292000 }, { "epoch": 3.26, "learning_rate": 0.00012867232011289984, "loss": 0.2315, "step": 292500 }, { "epoch": 3.27, "learning_rate": 0.0001285890690429993, "loss": 0.2318, "step": 293000 }, { "epoch": 3.27, "eval_loss": 0.22088617086410522, "eval_runtime": 2.572, "eval_samples_per_second": 893.074, "eval_steps_per_second": 13.997, "step": 293000 }, { "epoch": 3.27, "learning_rate": 0.00012850568513243934, "loss": 0.2318, "step": 293500 }, { "epoch": 3.28, "learning_rate": 0.00012842216860918846, "loss": 0.2318, "step": 294000 }, { "epoch": 3.28, "eval_loss": 0.2187003344297409, "eval_runtime": 2.5477, "eval_samples_per_second": 901.582, "eval_steps_per_second": 14.13, "step": 294000 }, { "epoch": 3.28, "learning_rate": 0.00012833851970157757, "loss": 0.2316, "step": 294500 }, { "epoch": 3.29, "learning_rate": 0.0001282547386382996, "loss": 0.2314, "step": 295000 }, { "epoch": 3.29, "eval_loss": 0.22022201120853424, "eval_runtime": 2.585, "eval_samples_per_second": 888.583, "eval_steps_per_second": 13.926, "step": 295000 }, { "epoch": 3.3, "learning_rate": 0.00012817082564840881, "loss": 0.2315, "step": 295500 }, { "epoch": 3.3, "learning_rate": 0.0001280867809613201, "loss": 0.2314, "step": 296000 }, { "epoch": 3.3, "eval_loss": 0.22136667370796204, "eval_runtime": 2.501, "eval_samples_per_second": 918.422, "eval_steps_per_second": 14.394, "step": 296000 }, { "epoch": 3.31, "learning_rate": 0.00012800260480680845, "loss": 0.2316, "step": 296500 }, { "epoch": 3.31, "learning_rate": 0.0001279182974150082, "loss": 0.2313, "step": 297000 }, { "epoch": 3.31, "eval_loss": 0.21788428723812103, "eval_runtime": 2.5879, "eval_samples_per_second": 887.603, "eval_steps_per_second": 13.911, "step": 297000 }, { "epoch": 3.32, "learning_rate": 0.00012783385901641258, "loss": 0.2312, "step": 297500 }, { "epoch": 3.32, "learning_rate": 0.00012774928984187297, "loss": 0.2309, "step": 298000 }, { "epoch": 3.32, "eval_loss": 0.22021803259849548, "eval_runtime": 2.7305, "eval_samples_per_second": 841.227, "eval_steps_per_second": 13.184, "step": 298000 }, { "epoch": 3.33, "learning_rate": 0.00012766459012259818, "loss": 0.2316, "step": 298500 }, { "epoch": 3.33, "learning_rate": 0.00012757976009015413, "loss": 0.2309, "step": 299000 }, { "epoch": 3.33, "eval_loss": 0.2220044583082199, "eval_runtime": 2.5329, "eval_samples_per_second": 906.883, "eval_steps_per_second": 14.213, "step": 299000 }, { "epoch": 3.34, "learning_rate": 0.00012749479997646275, "loss": 0.2307, "step": 299500 }, { "epoch": 3.35, "learning_rate": 0.0001274097100138019, "loss": 0.2313, "step": 300000 }, { "epoch": 3.35, "eval_loss": 0.2212502360343933, "eval_runtime": 2.5517, "eval_samples_per_second": 900.187, "eval_steps_per_second": 14.108, "step": 300000 }, { "epoch": 3.35, "learning_rate": 0.00012732449043480413, "loss": 0.2303, "step": 300500 }, { "epoch": 3.36, "learning_rate": 0.00012723914147245663, "loss": 0.2309, "step": 301000 }, { "epoch": 3.36, "eval_loss": 0.22217054665088654, "eval_runtime": 2.6319, "eval_samples_per_second": 872.746, "eval_steps_per_second": 13.678, "step": 301000 }, { "epoch": 3.36, "learning_rate": 0.00012715366336010016, "loss": 0.2309, "step": 301500 }, { "epoch": 3.37, "learning_rate": 0.00012706805633142863, "loss": 0.2306, "step": 302000 }, { "epoch": 3.37, "eval_loss": 0.2169163078069687, "eval_runtime": 2.56, "eval_samples_per_second": 897.252, "eval_steps_per_second": 14.062, "step": 302000 }, { "epoch": 3.37, "learning_rate": 0.00012698232062048837, "loss": 0.2306, "step": 302500 }, { "epoch": 3.38, "learning_rate": 0.00012689645646167755, "loss": 0.2302, "step": 303000 }, { "epoch": 3.38, "eval_loss": 0.21775402128696442, "eval_runtime": 2.5371, "eval_samples_per_second": 905.368, "eval_steps_per_second": 14.189, "step": 303000 }, { "epoch": 3.38, "learning_rate": 0.0001268104640897455, "loss": 0.2303, "step": 303500 }, { "epoch": 3.39, "learning_rate": 0.00012672434373979207, "loss": 0.2299, "step": 304000 }, { "epoch": 3.39, "eval_loss": 0.22127319872379303, "eval_runtime": 2.5832, "eval_samples_per_second": 889.194, "eval_steps_per_second": 13.936, "step": 304000 }, { "epoch": 3.4, "learning_rate": 0.00012663809564726706, "loss": 0.2302, "step": 304500 }, { "epoch": 3.4, "learning_rate": 0.00012655172004796936, "loss": 0.2304, "step": 305000 }, { "epoch": 3.4, "eval_loss": 0.21901114284992218, "eval_runtime": 2.5923, "eval_samples_per_second": 886.081, "eval_steps_per_second": 13.887, "step": 305000 }, { "epoch": 3.41, "learning_rate": 0.00012646521717804668, "loss": 0.2301, "step": 305500 }, { "epoch": 3.41, "learning_rate": 0.00012637858727399448, "loss": 0.2301, "step": 306000 }, { "epoch": 3.41, "eval_loss": 0.21952645480632782, "eval_runtime": 2.5862, "eval_samples_per_second": 888.187, "eval_steps_per_second": 13.92, "step": 306000 }, { "epoch": 3.42, "learning_rate": 0.00012629183057265563, "loss": 0.2301, "step": 306500 }, { "epoch": 3.42, "learning_rate": 0.00012620494731121966, "loss": 0.2297, "step": 307000 }, { "epoch": 3.42, "eval_loss": 0.2185007482767105, "eval_runtime": 2.5465, "eval_samples_per_second": 902.023, "eval_steps_per_second": 14.137, "step": 307000 }, { "epoch": 3.43, "learning_rate": 0.00012611793772722204, "loss": 0.2299, "step": 307500 }, { "epoch": 3.44, "learning_rate": 0.00012603080205854372, "loss": 0.2297, "step": 308000 }, { "epoch": 3.44, "eval_loss": 0.21959775686264038, "eval_runtime": 2.6732, "eval_samples_per_second": 859.262, "eval_steps_per_second": 13.467, "step": 308000 }, { "epoch": 3.44, "learning_rate": 0.00012594354054341024, "loss": 0.2299, "step": 308500 }, { "epoch": 3.45, "learning_rate": 0.00012585615342039126, "loss": 0.2299, "step": 309000 }, { "epoch": 3.45, "eval_loss": 0.21925170719623566, "eval_runtime": 2.6993, "eval_samples_per_second": 850.968, "eval_steps_per_second": 13.337, "step": 309000 }, { "epoch": 3.45, "learning_rate": 0.00012576864092839985, "loss": 0.2295, "step": 309500 }, { "epoch": 3.46, "learning_rate": 0.0001256810033066918, "loss": 0.2293, "step": 310000 }, { "epoch": 3.46, "eval_loss": 0.21877124905586243, "eval_runtime": 2.5234, "eval_samples_per_second": 910.263, "eval_steps_per_second": 14.266, "step": 310000 }, { "epoch": 3.46, "learning_rate": 0.00012559324079486505, "loss": 0.2299, "step": 310500 }, { "epoch": 3.47, "learning_rate": 0.0001255053536328589, "loss": 0.2294, "step": 311000 }, { "epoch": 3.47, "eval_loss": 0.2199290245771408, "eval_runtime": 2.5325, "eval_samples_per_second": 907.016, "eval_steps_per_second": 14.215, "step": 311000 }, { "epoch": 3.47, "learning_rate": 0.0001254173420609536, "loss": 0.2293, "step": 311500 }, { "epoch": 3.48, "learning_rate": 0.0001253292063197693, "loss": 0.2294, "step": 312000 }, { "epoch": 3.48, "eval_loss": 0.2200849950313568, "eval_runtime": 2.5639, "eval_samples_per_second": 895.889, "eval_steps_per_second": 14.041, "step": 312000 }, { "epoch": 3.49, "learning_rate": 0.00012524094665026584, "loss": 0.229, "step": 312500 }, { "epoch": 3.49, "learning_rate": 0.0001251525632937418, "loss": 0.2292, "step": 313000 }, { "epoch": 3.49, "eval_loss": 0.21636785566806793, "eval_runtime": 2.5639, "eval_samples_per_second": 895.902, "eval_steps_per_second": 14.041, "step": 313000 }, { "epoch": 3.5, "learning_rate": 0.0001250640564918338, "loss": 0.229, "step": 313500 }, { "epoch": 3.5, "learning_rate": 0.00012497542648651615, "loss": 0.229, "step": 314000 }, { "epoch": 3.5, "eval_loss": 0.21922779083251953, "eval_runtime": 2.5515, "eval_samples_per_second": 900.238, "eval_steps_per_second": 14.109, "step": 314000 }, { "epoch": 3.51, "learning_rate": 0.00012488667352009985, "loss": 0.229, "step": 314500 }, { "epoch": 3.51, "learning_rate": 0.00012479779783523216, "loss": 0.2287, "step": 315000 }, { "epoch": 3.51, "eval_loss": 0.21868407726287842, "eval_runtime": 2.5974, "eval_samples_per_second": 884.334, "eval_steps_per_second": 13.86, "step": 315000 }, { "epoch": 3.52, "learning_rate": 0.00012470879967489579, "loss": 0.2291, "step": 315500 }, { "epoch": 3.52, "learning_rate": 0.00012461967928240828, "loss": 0.2291, "step": 316000 }, { "epoch": 3.52, "eval_loss": 0.22106607258319855, "eval_runtime": 2.4653, "eval_samples_per_second": 931.733, "eval_steps_per_second": 14.603, "step": 316000 }, { "epoch": 3.53, "learning_rate": 0.00012453043690142143, "loss": 0.2285, "step": 316500 }, { "epoch": 3.54, "learning_rate": 0.00012444107277592047, "loss": 0.2285, "step": 317000 }, { "epoch": 3.54, "eval_loss": 0.21759365499019623, "eval_runtime": 2.6088, "eval_samples_per_second": 880.492, "eval_steps_per_second": 13.8, "step": 317000 }, { "epoch": 3.54, "learning_rate": 0.00012435158715022352, "loss": 0.2289, "step": 317500 }, { "epoch": 3.55, "learning_rate": 0.0001242619802689809, "loss": 0.2283, "step": 318000 }, { "epoch": 3.55, "eval_loss": 0.21786309778690338, "eval_runtime": 2.5783, "eval_samples_per_second": 890.904, "eval_steps_per_second": 13.963, "step": 318000 }, { "epoch": 3.55, "learning_rate": 0.00012417225237717434, "loss": 0.2287, "step": 318500 }, { "epoch": 3.56, "learning_rate": 0.00012408240372011647, "loss": 0.2282, "step": 319000 }, { "epoch": 3.56, "eval_loss": 0.2200136035680771, "eval_runtime": 2.6445, "eval_samples_per_second": 868.601, "eval_steps_per_second": 13.613, "step": 319000 }, { "epoch": 3.56, "learning_rate": 0.00012399243454345012, "loss": 0.2286, "step": 319500 }, { "epoch": 3.57, "learning_rate": 0.0001239023450931476, "loss": 0.2281, "step": 320000 }, { "epoch": 3.57, "eval_loss": 0.21773354709148407, "eval_runtime": 2.5181, "eval_samples_per_second": 912.179, "eval_steps_per_second": 14.296, "step": 320000 }, { "epoch": 3.57, "learning_rate": 0.00012381213561550995, "loss": 0.228, "step": 320500 }, { "epoch": 3.58, "learning_rate": 0.00012372180635716656, "loss": 0.2279, "step": 321000 }, { "epoch": 3.58, "eval_loss": 0.21474304795265198, "eval_runtime": 2.495, "eval_samples_per_second": 920.639, "eval_steps_per_second": 14.429, "step": 321000 }, { "epoch": 3.59, "learning_rate": 0.00012363135756507406, "loss": 0.2283, "step": 321500 }, { "epoch": 3.59, "learning_rate": 0.00012354078948651604, "loss": 0.228, "step": 322000 }, { "epoch": 3.59, "eval_loss": 0.21709482371807098, "eval_runtime": 2.5983, "eval_samples_per_second": 884.027, "eval_steps_per_second": 13.855, "step": 322000 }, { "epoch": 3.6, "learning_rate": 0.00012345010236910217, "loss": 0.2278, "step": 322500 }, { "epoch": 3.6, "learning_rate": 0.00012335929646076758, "loss": 0.2284, "step": 323000 }, { "epoch": 3.6, "eval_loss": 0.21743525564670563, "eval_runtime": 2.5879, "eval_samples_per_second": 887.578, "eval_steps_per_second": 13.911, "step": 323000 }, { "epoch": 3.61, "learning_rate": 0.0001232683720097721, "loss": 0.2278, "step": 323500 }, { "epoch": 3.61, "learning_rate": 0.00012317732926469976, "loss": 0.2282, "step": 324000 }, { "epoch": 3.61, "eval_loss": 0.2163660079240799, "eval_runtime": 2.509, "eval_samples_per_second": 915.491, "eval_steps_per_second": 14.348, "step": 324000 }, { "epoch": 3.62, "learning_rate": 0.00012308616847445794, "loss": 0.2278, "step": 324500 }, { "epoch": 3.62, "learning_rate": 0.00012299488988827675, "loss": 0.2278, "step": 325000 }, { "epoch": 3.62, "eval_loss": 0.21636153757572174, "eval_runtime": 2.5942, "eval_samples_per_second": 885.428, "eval_steps_per_second": 13.877, "step": 325000 }, { "epoch": 3.63, "learning_rate": 0.00012290349375570836, "loss": 0.2275, "step": 325500 }, { "epoch": 3.64, "learning_rate": 0.0001228119803266263, "loss": 0.2278, "step": 326000 }, { "epoch": 3.64, "eval_loss": 0.21803778409957886, "eval_runtime": 2.5627, "eval_samples_per_second": 896.316, "eval_steps_per_second": 14.048, "step": 326000 }, { "epoch": 3.64, "learning_rate": 0.0001227203498512248, "loss": 0.2276, "step": 326500 }, { "epoch": 3.65, "learning_rate": 0.0001226286025800181, "loss": 0.2276, "step": 327000 }, { "epoch": 3.65, "eval_loss": 0.21890942752361298, "eval_runtime": 2.5935, "eval_samples_per_second": 885.689, "eval_steps_per_second": 13.881, "step": 327000 }, { "epoch": 3.65, "learning_rate": 0.00012253673876383967, "loss": 0.2271, "step": 327500 }, { "epoch": 3.66, "learning_rate": 0.00012244475865384177, "loss": 0.2275, "step": 328000 }, { "epoch": 3.66, "eval_loss": 0.21888494491577148, "eval_runtime": 2.6221, "eval_samples_per_second": 876.02, "eval_steps_per_second": 13.73, "step": 328000 }, { "epoch": 3.66, "learning_rate": 0.00012235266250149444, "loss": 0.2273, "step": 328500 }, { "epoch": 3.67, "learning_rate": 0.00012226045055858505, "loss": 0.228, "step": 329000 }, { "epoch": 3.67, "eval_loss": 0.2192571461200714, "eval_runtime": 2.5089, "eval_samples_per_second": 915.552, "eval_steps_per_second": 14.349, "step": 329000 }, { "epoch": 3.67, "learning_rate": 0.00012216812307721755, "loss": 0.227, "step": 329500 }, { "epoch": 3.68, "learning_rate": 0.00012207568030981174, "loss": 0.2269, "step": 330000 }, { "epoch": 3.68, "eval_loss": 0.21729490160942078, "eval_runtime": 2.5927, "eval_samples_per_second": 885.965, "eval_steps_per_second": 13.885, "step": 330000 }, { "epoch": 3.69, "learning_rate": 0.00012198312250910265, "loss": 0.2271, "step": 330500 }, { "epoch": 3.69, "learning_rate": 0.00012189044992813972, "loss": 0.2271, "step": 331000 }, { "epoch": 3.69, "eval_loss": 0.21838127076625824, "eval_runtime": 2.5783, "eval_samples_per_second": 890.899, "eval_steps_per_second": 13.963, "step": 331000 }, { "epoch": 3.7, "learning_rate": 0.00012179766282028625, "loss": 0.2267, "step": 331500 }, { "epoch": 3.7, "learning_rate": 0.0001217047614392187, "loss": 0.2265, "step": 332000 }, { "epoch": 3.7, "eval_loss": 0.2152900993824005, "eval_runtime": 2.5995, "eval_samples_per_second": 883.629, "eval_steps_per_second": 13.849, "step": 332000 }, { "epoch": 3.71, "learning_rate": 0.00012161174603892584, "loss": 0.2271, "step": 332500 }, { "epoch": 3.71, "learning_rate": 0.00012151861687370828, "loss": 0.2269, "step": 333000 }, { "epoch": 3.71, "eval_loss": 0.21577809751033783, "eval_runtime": 2.6271, "eval_samples_per_second": 874.351, "eval_steps_per_second": 13.703, "step": 333000 }, { "epoch": 3.72, "learning_rate": 0.00012142537419817753, "loss": 0.2273, "step": 333500 }, { "epoch": 3.73, "learning_rate": 0.00012133201826725558, "loss": 0.2267, "step": 334000 }, { "epoch": 3.73, "eval_loss": 0.21679390966892242, "eval_runtime": 3.7147, "eval_samples_per_second": 618.35, "eval_steps_per_second": 9.691, "step": 334000 }, { "epoch": 3.73, "learning_rate": 0.00012123854933617394, "loss": 0.2264, "step": 334500 }, { "epoch": 3.74, "learning_rate": 0.0001211449676604731, "loss": 0.2269, "step": 335000 }, { "epoch": 3.74, "eval_loss": 0.21456079185009003, "eval_runtime": 2.5549, "eval_samples_per_second": 899.059, "eval_steps_per_second": 14.091, "step": 335000 }, { "epoch": 3.74, "learning_rate": 0.0001210512734960018, "loss": 0.2266, "step": 335500 }, { "epoch": 3.75, "learning_rate": 0.00012095746709891632, "loss": 0.2257, "step": 336000 }, { "epoch": 3.75, "eval_loss": 0.21742412447929382, "eval_runtime": 2.5117, "eval_samples_per_second": 914.52, "eval_steps_per_second": 14.333, "step": 336000 }, { "epoch": 3.75, "learning_rate": 0.00012086354872567969, "loss": 0.2269, "step": 336500 }, { "epoch": 3.76, "learning_rate": 0.00012076951863306127, "loss": 0.2263, "step": 337000 }, { "epoch": 3.76, "eval_loss": 0.21796566247940063, "eval_runtime": 2.5462, "eval_samples_per_second": 902.138, "eval_steps_per_second": 14.139, "step": 337000 }, { "epoch": 3.76, "learning_rate": 0.00012067537707813568, "loss": 0.226, "step": 337500 }, { "epoch": 3.77, "learning_rate": 0.0001205811243182823, "loss": 0.2263, "step": 338000 }, { "epoch": 3.77, "eval_loss": 0.21637655794620514, "eval_runtime": 2.5145, "eval_samples_per_second": 913.508, "eval_steps_per_second": 14.317, "step": 338000 }, { "epoch": 3.78, "learning_rate": 0.00012048676061118467, "loss": 0.226, "step": 338500 }, { "epoch": 3.78, "learning_rate": 0.00012039228621482949, "loss": 0.2262, "step": 339000 }, { "epoch": 3.78, "eval_loss": 0.21473273634910583, "eval_runtime": 2.5773, "eval_samples_per_second": 891.235, "eval_steps_per_second": 13.968, "step": 339000 }, { "epoch": 3.79, "learning_rate": 0.0001202977013875062, "loss": 0.2257, "step": 339500 }, { "epoch": 3.79, "learning_rate": 0.00012020300638780604, "loss": 0.2259, "step": 340000 }, { "epoch": 3.79, "eval_loss": 0.21654388308525085, "eval_runtime": 2.5215, "eval_samples_per_second": 910.974, "eval_steps_per_second": 14.277, "step": 340000 }, { "epoch": 3.8, "learning_rate": 0.0001201082014746216, "loss": 0.226, "step": 340500 }, { "epoch": 3.8, "learning_rate": 0.00012001328690714582, "loss": 0.2263, "step": 341000 }, { "epoch": 3.8, "eval_loss": 0.2129027545452118, "eval_runtime": 2.51, "eval_samples_per_second": 915.143, "eval_steps_per_second": 14.343, "step": 341000 }, { "epoch": 3.81, "learning_rate": 0.00011991826294487155, "loss": 0.2259, "step": 341500 }, { "epoch": 3.81, "learning_rate": 0.00011982312984759068, "loss": 0.2259, "step": 342000 }, { "epoch": 3.81, "eval_loss": 0.21433599293231964, "eval_runtime": 2.5873, "eval_samples_per_second": 887.783, "eval_steps_per_second": 13.914, "step": 342000 }, { "epoch": 3.82, "learning_rate": 0.00011972788787539345, "loss": 0.2254, "step": 342500 }, { "epoch": 3.83, "learning_rate": 0.00011963253728866778, "loss": 0.2254, "step": 343000 }, { "epoch": 3.83, "eval_loss": 0.21325699985027313, "eval_runtime": 2.551, "eval_samples_per_second": 900.437, "eval_steps_per_second": 14.112, "step": 343000 }, { "epoch": 3.83, "learning_rate": 0.00011953707834809848, "loss": 0.2257, "step": 343500 }, { "epoch": 3.84, "learning_rate": 0.00011944151131466675, "loss": 0.2259, "step": 344000 }, { "epoch": 3.84, "eval_loss": 0.2161191999912262, "eval_runtime": 2.6489, "eval_samples_per_second": 867.163, "eval_steps_per_second": 13.591, "step": 344000 }, { "epoch": 3.84, "learning_rate": 0.00011934583644964913, "loss": 0.2255, "step": 344500 }, { "epoch": 3.85, "learning_rate": 0.00011925005401461709, "loss": 0.2256, "step": 345000 }, { "epoch": 3.85, "eval_loss": 0.21498842537403107, "eval_runtime": 2.5418, "eval_samples_per_second": 903.695, "eval_steps_per_second": 14.163, "step": 345000 }, { "epoch": 3.85, "learning_rate": 0.00011915416427143613, "loss": 0.225, "step": 345500 }, { "epoch": 3.86, "learning_rate": 0.00011905816748226513, "loss": 0.2257, "step": 346000 }, { "epoch": 3.86, "eval_loss": 0.21445859968662262, "eval_runtime": 2.5612, "eval_samples_per_second": 896.853, "eval_steps_per_second": 14.056, "step": 346000 }, { "epoch": 3.86, "learning_rate": 0.00011896206390955567, "loss": 0.225, "step": 346500 }, { "epoch": 3.87, "learning_rate": 0.00011886585381605125, "loss": 0.2251, "step": 347000 }, { "epoch": 3.87, "eval_loss": 0.2150752693414688, "eval_runtime": 2.605, "eval_samples_per_second": 881.779, "eval_steps_per_second": 13.82, "step": 347000 }, { "epoch": 3.88, "learning_rate": 0.0001187695374647866, "loss": 0.2251, "step": 347500 }, { "epoch": 3.88, "learning_rate": 0.00011867311511908693, "loss": 0.2249, "step": 348000 }, { "epoch": 3.88, "eval_loss": 0.212222158908844, "eval_runtime": 2.552, "eval_samples_per_second": 900.074, "eval_steps_per_second": 14.107, "step": 348000 }, { "epoch": 3.89, "learning_rate": 0.00011857658704256721, "loss": 0.2249, "step": 348500 }, { "epoch": 3.89, "learning_rate": 0.00011847995349913162, "loss": 0.2252, "step": 349000 }, { "epoch": 3.89, "eval_loss": 0.21469330787658691, "eval_runtime": 2.6867, "eval_samples_per_second": 854.946, "eval_steps_per_second": 13.399, "step": 349000 }, { "epoch": 3.9, "learning_rate": 0.00011838321475297247, "loss": 0.2243, "step": 349500 }, { "epoch": 3.9, "learning_rate": 0.00011828637106856989, "loss": 0.2248, "step": 350000 }, { "epoch": 3.9, "eval_loss": 0.21383698284626007, "eval_runtime": 2.5687, "eval_samples_per_second": 894.235, "eval_steps_per_second": 14.015, "step": 350000 }, { "epoch": 3.91, "learning_rate": 0.00011818942271069073, "loss": 0.2252, "step": 350500 }, { "epoch": 3.91, "learning_rate": 0.00011809236994438816, "loss": 0.2245, "step": 351000 }, { "epoch": 3.91, "eval_loss": 0.21465420722961426, "eval_runtime": 2.6241, "eval_samples_per_second": 875.34, "eval_steps_per_second": 13.719, "step": 351000 }, { "epoch": 3.92, "learning_rate": 0.0001179952130350007, "loss": 0.2245, "step": 351500 }, { "epoch": 3.93, "learning_rate": 0.00011789795224815164, "loss": 0.2246, "step": 352000 }, { "epoch": 3.93, "eval_loss": 0.21523068845272064, "eval_runtime": 2.5097, "eval_samples_per_second": 915.238, "eval_steps_per_second": 14.344, "step": 352000 }, { "epoch": 3.93, "learning_rate": 0.00011780058784974831, "loss": 0.2247, "step": 352500 }, { "epoch": 3.94, "learning_rate": 0.00011770312010598116, "loss": 0.2243, "step": 353000 }, { "epoch": 3.94, "eval_loss": 0.21631552278995514, "eval_runtime": 2.5552, "eval_samples_per_second": 898.94, "eval_steps_per_second": 14.089, "step": 353000 }, { "epoch": 3.94, "learning_rate": 0.00011760554928332333, "loss": 0.2242, "step": 353500 }, { "epoch": 3.95, "learning_rate": 0.00011750787564852973, "loss": 0.2245, "step": 354000 }, { "epoch": 3.95, "eval_loss": 0.21260108053684235, "eval_runtime": 2.5458, "eval_samples_per_second": 902.259, "eval_steps_per_second": 14.141, "step": 354000 }, { "epoch": 3.95, "learning_rate": 0.00011741009946863639, "loss": 0.2242, "step": 354500 }, { "epoch": 3.96, "learning_rate": 0.00011731222101095955, "loss": 0.2239, "step": 355000 }, { "epoch": 3.96, "eval_loss": 0.2130313217639923, "eval_runtime": 2.5436, "eval_samples_per_second": 903.054, "eval_steps_per_second": 14.153, "step": 355000 }, { "epoch": 3.96, "learning_rate": 0.00011721424054309525, "loss": 0.2236, "step": 355500 }, { "epoch": 3.97, "learning_rate": 0.00011711615833291833, "loss": 0.2239, "step": 356000 }, { "epoch": 3.97, "eval_loss": 0.21391764283180237, "eval_runtime": 2.5894, "eval_samples_per_second": 887.095, "eval_steps_per_second": 13.903, "step": 356000 }, { "epoch": 3.98, "learning_rate": 0.00011701797464858178, "loss": 0.2243, "step": 356500 }, { "epoch": 3.98, "learning_rate": 0.0001169196897585161, "loss": 0.2241, "step": 357000 }, { "epoch": 3.98, "eval_loss": 0.2131282538175583, "eval_runtime": 2.6054, "eval_samples_per_second": 881.617, "eval_steps_per_second": 13.817, "step": 357000 }, { "epoch": 3.99, "learning_rate": 0.00011682130393142838, "loss": 0.2236, "step": 357500 }, { "epoch": 3.99, "learning_rate": 0.00011672281743630175, "loss": 0.2239, "step": 358000 }, { "epoch": 3.99, "eval_loss": 0.21421436965465546, "eval_runtime": 2.6143, "eval_samples_per_second": 878.642, "eval_steps_per_second": 13.771, "step": 358000 }, { "epoch": 4.0, "learning_rate": 0.00011662423054239445, "loss": 0.2254, "step": 358500 }, { "epoch": 4.0, "learning_rate": 0.0001165255435192394, "loss": 0.2237, "step": 359000 }, { "epoch": 4.0, "eval_loss": 0.21473369002342224, "eval_runtime": 2.5965, "eval_samples_per_second": 884.643, "eval_steps_per_second": 13.865, "step": 359000 }, { "epoch": 4.01, "learning_rate": 0.00011642675663664308, "loss": 0.2234, "step": 359500 }, { "epoch": 4.02, "learning_rate": 0.00011632787016468506, "loss": 0.2232, "step": 360000 }, { "epoch": 4.02, "eval_loss": 0.21288304030895233, "eval_runtime": 2.644, "eval_samples_per_second": 868.758, "eval_steps_per_second": 13.616, "step": 360000 }, { "epoch": 4.02, "learning_rate": 0.00011622888437371719, "loss": 0.2232, "step": 360500 }, { "epoch": 4.03, "learning_rate": 0.0001161297995343628, "loss": 0.2234, "step": 361000 }, { "epoch": 4.03, "eval_loss": 0.21240203082561493, "eval_runtime": 2.5699, "eval_samples_per_second": 893.801, "eval_steps_per_second": 14.008, "step": 361000 }, { "epoch": 4.03, "learning_rate": 0.00011603061591751615, "loss": 0.2238, "step": 361500 }, { "epoch": 4.04, "learning_rate": 0.00011593133379434138, "loss": 0.2238, "step": 362000 }, { "epoch": 4.04, "eval_loss": 0.2136167287826538, "eval_runtime": 2.4904, "eval_samples_per_second": 922.34, "eval_steps_per_second": 14.455, "step": 362000 }, { "epoch": 4.04, "learning_rate": 0.00011583195343627207, "loss": 0.2234, "step": 362500 }, { "epoch": 4.05, "learning_rate": 0.00011573247511501028, "loss": 0.2235, "step": 363000 }, { "epoch": 4.05, "eval_loss": 0.2138611227273941, "eval_runtime": 2.4582, "eval_samples_per_second": 934.423, "eval_steps_per_second": 14.645, "step": 363000 }, { "epoch": 4.05, "learning_rate": 0.00011563289910252599, "loss": 0.2229, "step": 363500 }, { "epoch": 4.06, "learning_rate": 0.00011553322567105619, "loss": 0.2229, "step": 364000 }, { "epoch": 4.06, "eval_loss": 0.21355026960372925, "eval_runtime": 2.5444, "eval_samples_per_second": 902.768, "eval_steps_per_second": 14.149, "step": 364000 }, { "epoch": 4.07, "learning_rate": 0.00011543345509310421, "loss": 0.223, "step": 364500 }, { "epoch": 4.07, "learning_rate": 0.00011533358764143905, "loss": 0.2225, "step": 365000 }, { "epoch": 4.07, "eval_loss": 0.21412673592567444, "eval_runtime": 2.6104, "eval_samples_per_second": 879.928, "eval_steps_per_second": 13.791, "step": 365000 }, { "epoch": 4.08, "learning_rate": 0.00011523362358909449, "loss": 0.223, "step": 365500 }, { "epoch": 4.08, "learning_rate": 0.00011513356320936841, "loss": 0.2231, "step": 366000 }, { "epoch": 4.08, "eval_loss": 0.21194761991500854, "eval_runtime": 2.4955, "eval_samples_per_second": 920.443, "eval_steps_per_second": 14.426, "step": 366000 }, { "epoch": 4.09, "learning_rate": 0.00011503340677582213, "loss": 0.223, "step": 366500 }, { "epoch": 4.09, "learning_rate": 0.00011493315456227943, "loss": 0.2231, "step": 367000 }, { "epoch": 4.09, "eval_loss": 0.21338239312171936, "eval_runtime": 2.5254, "eval_samples_per_second": 909.575, "eval_steps_per_second": 14.255, "step": 367000 }, { "epoch": 4.1, "learning_rate": 0.00011483280684282611, "loss": 0.2226, "step": 367500 }, { "epoch": 4.1, "learning_rate": 0.00011473236389180894, "loss": 0.2228, "step": 368000 }, { "epoch": 4.1, "eval_loss": 0.21372437477111816, "eval_runtime": 2.6109, "eval_samples_per_second": 879.784, "eval_steps_per_second": 13.789, "step": 368000 }, { "epoch": 4.11, "learning_rate": 0.00011463182598383516, "loss": 0.2226, "step": 368500 }, { "epoch": 4.12, "learning_rate": 0.00011453119339377154, "loss": 0.2226, "step": 369000 }, { "epoch": 4.12, "eval_loss": 0.2124049812555313, "eval_runtime": 2.4867, "eval_samples_per_second": 923.72, "eval_steps_per_second": 14.477, "step": 369000 }, { "epoch": 4.12, "learning_rate": 0.00011443046639674375, "loss": 0.2224, "step": 369500 }, { "epoch": 4.13, "learning_rate": 0.00011432964526813558, "loss": 0.2229, "step": 370000 }, { "epoch": 4.13, "eval_loss": 0.21308059990406036, "eval_runtime": 2.6387, "eval_samples_per_second": 870.49, "eval_steps_per_second": 13.643, "step": 370000 }, { "epoch": 4.13, "learning_rate": 0.00011422873028358807, "loss": 0.2223, "step": 370500 }, { "epoch": 4.14, "learning_rate": 0.00011412772171899904, "loss": 0.2224, "step": 371000 }, { "epoch": 4.14, "eval_loss": 0.212064728140831, "eval_runtime": 2.5187, "eval_samples_per_second": 911.981, "eval_steps_per_second": 14.293, "step": 371000 }, { "epoch": 4.14, "learning_rate": 0.00011402661985052197, "loss": 0.222, "step": 371500 }, { "epoch": 4.15, "learning_rate": 0.00011392542495456556, "loss": 0.222, "step": 372000 }, { "epoch": 4.15, "eval_loss": 0.21111257374286652, "eval_runtime": 2.5956, "eval_samples_per_second": 884.946, "eval_steps_per_second": 13.869, "step": 372000 }, { "epoch": 4.15, "learning_rate": 0.00011382413730779273, "loss": 0.222, "step": 372500 }, { "epoch": 4.16, "learning_rate": 0.00011372275718712006, "loss": 0.2223, "step": 373000 }, { "epoch": 4.16, "eval_loss": 0.21289856731891632, "eval_runtime": 2.6026, "eval_samples_per_second": 882.577, "eval_steps_per_second": 13.832, "step": 373000 }, { "epoch": 4.17, "learning_rate": 0.00011362128486971696, "loss": 0.2224, "step": 373500 }, { "epoch": 4.17, "learning_rate": 0.00011351972063300484, "loss": 0.2222, "step": 374000 }, { "epoch": 4.17, "eval_loss": 0.2087879329919815, "eval_runtime": 2.5302, "eval_samples_per_second": 907.82, "eval_steps_per_second": 14.228, "step": 374000 }, { "epoch": 4.18, "learning_rate": 0.0001134180647546565, "loss": 0.2218, "step": 374500 }, { "epoch": 4.18, "learning_rate": 0.00011331631751259515, "loss": 0.222, "step": 375000 }, { "epoch": 4.18, "eval_loss": 0.21286322176456451, "eval_runtime": 2.6262, "eval_samples_per_second": 874.641, "eval_steps_per_second": 13.708, "step": 375000 }, { "epoch": 4.19, "learning_rate": 0.00011321447918499391, "loss": 0.2218, "step": 375500 }, { "epoch": 4.19, "learning_rate": 0.00011311255005027487, "loss": 0.222, "step": 376000 }, { "epoch": 4.19, "eval_loss": 0.210297092795372, "eval_runtime": 2.5937, "eval_samples_per_second": 885.619, "eval_steps_per_second": 13.88, "step": 376000 }, { "epoch": 4.2, "learning_rate": 0.00011301053038710837, "loss": 0.2218, "step": 376500 }, { "epoch": 4.2, "learning_rate": 0.00011290842047441232, "loss": 0.222, "step": 377000 }, { "epoch": 4.2, "eval_loss": 0.21230094134807587, "eval_runtime": 2.5876, "eval_samples_per_second": 887.679, "eval_steps_per_second": 13.912, "step": 377000 }, { "epoch": 4.21, "learning_rate": 0.0001128062205913513, "loss": 0.2217, "step": 377500 }, { "epoch": 4.22, "learning_rate": 0.00011270393101733585, "loss": 0.222, "step": 378000 }, { "epoch": 4.22, "eval_loss": 0.2095794677734375, "eval_runtime": 2.6193, "eval_samples_per_second": 876.967, "eval_steps_per_second": 13.744, "step": 378000 }, { "epoch": 4.22, "learning_rate": 0.00011260155203202183, "loss": 0.2215, "step": 378500 }, { "epoch": 4.23, "learning_rate": 0.00011249908391530946, "loss": 0.2214, "step": 379000 }, { "epoch": 4.23, "eval_loss": 0.21008515357971191, "eval_runtime": 2.6054, "eval_samples_per_second": 881.646, "eval_steps_per_second": 13.818, "step": 379000 }, { "epoch": 4.23, "learning_rate": 0.0001123965269473426, "loss": 0.2217, "step": 379500 }, { "epoch": 4.24, "learning_rate": 0.00011229388140850814, "loss": 0.2215, "step": 380000 }, { "epoch": 4.24, "eval_loss": 0.21324031054973602, "eval_runtime": 2.6426, "eval_samples_per_second": 869.205, "eval_steps_per_second": 13.623, "step": 380000 }, { "epoch": 4.24, "learning_rate": 0.00011219114757943505, "loss": 0.2215, "step": 380500 }, { "epoch": 4.25, "learning_rate": 0.00011208832574099368, "loss": 0.2215, "step": 381000 }, { "epoch": 4.25, "eval_loss": 0.20935191214084625, "eval_runtime": 2.5516, "eval_samples_per_second": 900.235, "eval_steps_per_second": 14.109, "step": 381000 }, { "epoch": 4.25, "learning_rate": 0.00011198541617429504, "loss": 0.2213, "step": 381500 }, { "epoch": 4.26, "learning_rate": 0.00011188241916068993, "loss": 0.2211, "step": 382000 }, { "epoch": 4.26, "eval_loss": 0.21030767261981964, "eval_runtime": 2.5649, "eval_samples_per_second": 895.555, "eval_steps_per_second": 14.036, "step": 382000 }, { "epoch": 4.27, "learning_rate": 0.00011177933498176826, "loss": 0.221, "step": 382500 }, { "epoch": 4.27, "learning_rate": 0.00011167616391935826, "loss": 0.221, "step": 383000 }, { "epoch": 4.27, "eval_loss": 0.21177974343299866, "eval_runtime": 2.5071, "eval_samples_per_second": 916.184, "eval_steps_per_second": 14.359, "step": 383000 }, { "epoch": 4.28, "learning_rate": 0.00011157290625552563, "loss": 0.2206, "step": 383500 }, { "epoch": 4.28, "learning_rate": 0.00011146956227257293, "loss": 0.2209, "step": 384000 }, { "epoch": 4.28, "eval_loss": 0.2104860246181488, "eval_runtime": 2.5213, "eval_samples_per_second": 911.034, "eval_steps_per_second": 14.278, "step": 384000 }, { "epoch": 4.29, "learning_rate": 0.0001113661322530386, "loss": 0.2204, "step": 384500 }, { "epoch": 4.29, "learning_rate": 0.00011126261647969645, "loss": 0.221, "step": 385000 }, { "epoch": 4.29, "eval_loss": 0.20968575775623322, "eval_runtime": 2.69, "eval_samples_per_second": 853.888, "eval_steps_per_second": 13.383, "step": 385000 }, { "epoch": 4.3, "learning_rate": 0.00011115901523555457, "loss": 0.2206, "step": 385500 }, { "epoch": 4.31, "learning_rate": 0.00011105532880385487, "loss": 0.2207, "step": 386000 }, { "epoch": 4.31, "eval_loss": 0.21161584556102753, "eval_runtime": 2.6386, "eval_samples_per_second": 870.537, "eval_steps_per_second": 13.644, "step": 386000 }, { "epoch": 4.31, "learning_rate": 0.00011095155746807206, "loss": 0.2211, "step": 386500 }, { "epoch": 4.32, "learning_rate": 0.00011084770151191299, "loss": 0.2207, "step": 387000 }, { "epoch": 4.32, "eval_loss": 0.20934854447841644, "eval_runtime": 2.6571, "eval_samples_per_second": 864.463, "eval_steps_per_second": 13.548, "step": 387000 }, { "epoch": 4.32, "learning_rate": 0.00011074376121931591, "loss": 0.2204, "step": 387500 }, { "epoch": 4.33, "learning_rate": 0.00011063973687444962, "loss": 0.2205, "step": 388000 }, { "epoch": 4.33, "eval_loss": 0.20918506383895874, "eval_runtime": 2.6259, "eval_samples_per_second": 874.731, "eval_steps_per_second": 13.709, "step": 388000 }, { "epoch": 4.33, "learning_rate": 0.00011053562876171268, "loss": 0.2204, "step": 388500 }, { "epoch": 4.34, "learning_rate": 0.00011043143716573272, "loss": 0.2204, "step": 389000 }, { "epoch": 4.34, "eval_loss": 0.2104416787624359, "eval_runtime": 2.543, "eval_samples_per_second": 903.26, "eval_steps_per_second": 14.156, "step": 389000 }, { "epoch": 4.34, "learning_rate": 0.00011032716237136557, "loss": 0.2206, "step": 389500 }, { "epoch": 4.35, "learning_rate": 0.00011022280466369448, "loss": 0.2204, "step": 390000 }, { "epoch": 4.35, "eval_loss": 0.20674540102481842, "eval_runtime": 3.9957, "eval_samples_per_second": 574.865, "eval_steps_per_second": 9.01, "step": 390000 }, { "epoch": 4.36, "learning_rate": 0.00011011836432802956, "loss": 0.2203, "step": 390500 }, { "epoch": 4.36, "learning_rate": 0.00011001384164990662, "loss": 0.2204, "step": 391000 }, { "epoch": 4.36, "eval_loss": 0.20982348918914795, "eval_runtime": 2.6878, "eval_samples_per_second": 854.612, "eval_steps_per_second": 13.394, "step": 391000 }, { "epoch": 4.37, "learning_rate": 0.00010990923691508666, "loss": 0.2206, "step": 391500 }, { "epoch": 4.37, "learning_rate": 0.00010980455040955506, "loss": 0.2201, "step": 392000 }, { "epoch": 4.37, "eval_loss": 0.20970028638839722, "eval_runtime": 2.66, "eval_samples_per_second": 863.53, "eval_steps_per_second": 13.534, "step": 392000 }, { "epoch": 4.38, "learning_rate": 0.00010969978241952076, "loss": 0.2199, "step": 392500 }, { "epoch": 4.38, "learning_rate": 0.00010959493323141538, "loss": 0.2198, "step": 393000 }, { "epoch": 4.38, "eval_loss": 0.2100575715303421, "eval_runtime": 2.6711, "eval_samples_per_second": 859.949, "eval_steps_per_second": 13.478, "step": 393000 }, { "epoch": 4.39, "learning_rate": 0.00010949000313189264, "loss": 0.2201, "step": 393500 }, { "epoch": 4.39, "learning_rate": 0.00010938499240782739, "loss": 0.2197, "step": 394000 }, { "epoch": 4.39, "eval_loss": 0.21025221049785614, "eval_runtime": 2.6759, "eval_samples_per_second": 858.394, "eval_steps_per_second": 13.453, "step": 394000 }, { "epoch": 4.4, "learning_rate": 0.00010927990134631496, "loss": 0.2201, "step": 394500 }, { "epoch": 4.41, "learning_rate": 0.00010917473023467032, "loss": 0.2197, "step": 395000 }, { "epoch": 4.41, "eval_loss": 0.21049723029136658, "eval_runtime": 2.4803, "eval_samples_per_second": 926.101, "eval_steps_per_second": 14.514, "step": 395000 }, { "epoch": 4.41, "learning_rate": 0.00010906947936042724, "loss": 0.2192, "step": 395500 }, { "epoch": 4.42, "learning_rate": 0.00010896414901133761, "loss": 0.2196, "step": 396000 }, { "epoch": 4.42, "eval_loss": 0.2069980949163437, "eval_runtime": 2.5039, "eval_samples_per_second": 917.356, "eval_steps_per_second": 14.377, "step": 396000 }, { "epoch": 4.42, "learning_rate": 0.00010885873947537058, "loss": 0.2197, "step": 396500 }, { "epoch": 4.43, "learning_rate": 0.00010875325104071177, "loss": 0.22, "step": 397000 }, { "epoch": 4.43, "eval_loss": 0.21147653460502625, "eval_runtime": 2.5638, "eval_samples_per_second": 895.951, "eval_steps_per_second": 14.042, "step": 397000 }, { "epoch": 4.43, "learning_rate": 0.00010864768399576257, "loss": 0.2196, "step": 397500 }, { "epoch": 4.44, "learning_rate": 0.00010854203862913927, "loss": 0.2194, "step": 398000 }, { "epoch": 4.44, "eval_loss": 0.21261727809906006, "eval_runtime": 2.567, "eval_samples_per_second": 894.808, "eval_steps_per_second": 14.024, "step": 398000 }, { "epoch": 4.44, "learning_rate": 0.00010843631522967218, "loss": 0.2193, "step": 398500 }, { "epoch": 4.45, "learning_rate": 0.00010833051408640509, "loss": 0.2193, "step": 399000 }, { "epoch": 4.45, "eval_loss": 0.2084684520959854, "eval_runtime": 2.5323, "eval_samples_per_second": 907.094, "eval_steps_per_second": 14.217, "step": 399000 }, { "epoch": 4.46, "learning_rate": 0.0001082246354885943, "loss": 0.2187, "step": 399500 }, { "epoch": 4.46, "learning_rate": 0.00010811867972570786, "loss": 0.2195, "step": 400000 }, { "epoch": 4.46, "eval_loss": 0.20896346867084503, "eval_runtime": 2.5912, "eval_samples_per_second": 886.47, "eval_steps_per_second": 13.893, "step": 400000 }, { "epoch": 4.47, "learning_rate": 0.00010801264708742474, "loss": 0.219, "step": 400500 }, { "epoch": 4.47, "learning_rate": 0.00010790653786363416, "loss": 0.6229, "step": 401000 }, { "epoch": 4.47, "eval_loss": 0.6781117916107178, "eval_runtime": 2.5271, "eval_samples_per_second": 908.933, "eval_steps_per_second": 14.245, "step": 401000 }, { "epoch": 4.48, "learning_rate": 0.00010780035234443463, "loss": 0.6782, "step": 401500 }, { "epoch": 4.48, "learning_rate": 0.00010769409082013337, "loss": 0.6775, "step": 402000 }, { "epoch": 4.48, "eval_loss": 0.6771849989891052, "eval_runtime": 2.5891, "eval_samples_per_second": 887.183, "eval_steps_per_second": 13.904, "step": 402000 }, { "epoch": 4.49, "learning_rate": 0.00010758775358124532, "loss": 0.6774, "step": 402500 }, { "epoch": 4.49, "learning_rate": 0.00010748134091849238, "loss": 0.6773, "step": 403000 }, { "epoch": 4.49, "eval_loss": 0.6773160099983215, "eval_runtime": 2.6017, "eval_samples_per_second": 882.892, "eval_steps_per_second": 13.837, "step": 403000 }, { "epoch": 4.5, "learning_rate": 0.00010737485312280277, "loss": 0.6773, "step": 403500 }, { "epoch": 4.51, "learning_rate": 0.00010726829048531, "loss": 0.6773, "step": 404000 }, { "epoch": 4.51, "eval_loss": 0.6771775484085083, "eval_runtime": 2.5177, "eval_samples_per_second": 912.323, "eval_steps_per_second": 14.298, "step": 404000 }, { "epoch": 4.51, "learning_rate": 0.00010716165329735229, "loss": 0.6773, "step": 404500 }, { "epoch": 4.52, "learning_rate": 0.00010705494185047165, "loss": 0.6773, "step": 405000 }, { "epoch": 4.52, "eval_loss": 0.6771337389945984, "eval_runtime": 2.4415, "eval_samples_per_second": 940.818, "eval_steps_per_second": 14.745, "step": 405000 }, { "epoch": 4.52, "learning_rate": 0.00010694815643641308, "loss": 0.6773, "step": 405500 }, { "epoch": 4.53, "learning_rate": 0.0001068412973471238, "loss": 0.6774, "step": 406000 }, { "epoch": 4.53, "eval_loss": 0.6766112446784973, "eval_runtime": 2.5325, "eval_samples_per_second": 906.994, "eval_steps_per_second": 14.215, "step": 406000 }, { "epoch": 4.53, "learning_rate": 0.00010673436487475252, "loss": 0.6771, "step": 406500 }, { "epoch": 4.54, "learning_rate": 0.00010662735931164853, "loss": 0.6774, "step": 407000 }, { "epoch": 4.54, "eval_loss": 0.6774923801422119, "eval_runtime": 2.5078, "eval_samples_per_second": 915.958, "eval_steps_per_second": 14.355, "step": 407000 }, { "epoch": 4.54, "learning_rate": 0.00010652028095036092, "loss": 0.6773, "step": 407500 }, { "epoch": 4.55, "learning_rate": 0.0001064131300836379, "loss": 0.6774, "step": 408000 }, { "epoch": 4.55, "eval_loss": 0.6766784191131592, "eval_runtime": 2.6482, "eval_samples_per_second": 867.379, "eval_steps_per_second": 13.594, "step": 408000 }, { "epoch": 4.56, "learning_rate": 0.0001063059070044258, "loss": 0.6772, "step": 408500 }, { "epoch": 4.56, "learning_rate": 0.0001061986120058684, "loss": 0.6713, "step": 409000 }, { "epoch": 4.56, "eval_loss": 0.6385542154312134, "eval_runtime": 2.5784, "eval_samples_per_second": 890.853, "eval_steps_per_second": 13.962, "step": 409000 }, { "epoch": 4.57, "learning_rate": 0.00010609124538130623, "loss": 0.4505, "step": 409500 }, { "epoch": 4.57, "learning_rate": 0.00010598380742427543, "loss": 0.2349, "step": 410000 }, { "epoch": 4.57, "eval_loss": 0.22073650360107422, "eval_runtime": 2.623, "eval_samples_per_second": 875.727, "eval_steps_per_second": 13.725, "step": 410000 }, { "epoch": 4.58, "learning_rate": 0.00010587629842850737, "loss": 0.2291, "step": 410500 }, { "epoch": 4.58, "learning_rate": 0.00010576871868792746, "loss": 0.2266, "step": 411000 }, { "epoch": 4.58, "eval_loss": 0.21682733297348022, "eval_runtime": 2.6588, "eval_samples_per_second": 863.931, "eval_steps_per_second": 13.54, "step": 411000 }, { "epoch": 4.59, "learning_rate": 0.00010566106849665463, "loss": 0.2249, "step": 411500 }, { "epoch": 4.59, "learning_rate": 0.0001055533481490004, "loss": 0.2234, "step": 412000 }, { "epoch": 4.59, "eval_loss": 0.2134321630001068, "eval_runtime": 2.6016, "eval_samples_per_second": 882.919, "eval_steps_per_second": 13.838, "step": 412000 }, { "epoch": 4.6, "learning_rate": 0.00010544555793946805, "loss": 0.2232, "step": 412500 }, { "epoch": 4.61, "learning_rate": 0.000105337698162752, "loss": 0.222, "step": 413000 }, { "epoch": 4.61, "eval_loss": 0.21415403485298157, "eval_runtime": 2.6959, "eval_samples_per_second": 852.048, "eval_steps_per_second": 13.354, "step": 413000 }, { "epoch": 4.61, "learning_rate": 0.00010522976911373667, "loss": 0.2219, "step": 413500 }, { "epoch": 4.62, "learning_rate": 0.00010512177108749594, "loss": 0.2217, "step": 414000 }, { "epoch": 4.62, "eval_loss": 0.21083350479602814, "eval_runtime": 2.6458, "eval_samples_per_second": 868.155, "eval_steps_per_second": 13.606, "step": 414000 }, { "epoch": 4.62, "learning_rate": 0.00010501370437929234, "loss": 0.2211, "step": 414500 }, { "epoch": 4.63, "learning_rate": 0.00010490556928457616, "loss": 0.2215, "step": 415000 }, { "epoch": 4.63, "eval_loss": 0.21060839295387268, "eval_runtime": 2.64, "eval_samples_per_second": 870.073, "eval_steps_per_second": 13.636, "step": 415000 }, { "epoch": 4.63, "learning_rate": 0.00010479736609898454, "loss": 0.2206, "step": 415500 }, { "epoch": 4.64, "learning_rate": 0.00010468909511834088, "loss": 0.2209, "step": 416000 }, { "epoch": 4.64, "eval_loss": 0.20775271952152252, "eval_runtime": 2.6856, "eval_samples_per_second": 855.309, "eval_steps_per_second": 13.405, "step": 416000 }, { "epoch": 4.65, "learning_rate": 0.00010458075663865392, "loss": 0.2205, "step": 416500 }, { "epoch": 4.65, "learning_rate": 0.00010447235095611692, "loss": 0.2201, "step": 417000 }, { "epoch": 4.65, "eval_loss": 0.20872974395751953, "eval_runtime": 2.6782, "eval_samples_per_second": 857.663, "eval_steps_per_second": 13.442, "step": 417000 }, { "epoch": 4.66, "learning_rate": 0.0001043638783671069, "loss": 0.2206, "step": 417500 }, { "epoch": 4.66, "learning_rate": 0.00010425533916818376, "loss": 0.2199, "step": 418000 }, { "epoch": 4.66, "eval_loss": 0.20818735659122467, "eval_runtime": 2.6703, "eval_samples_per_second": 860.216, "eval_steps_per_second": 13.482, "step": 418000 }, { "epoch": 4.67, "learning_rate": 0.0001041467336560895, "loss": 0.22, "step": 418500 }, { "epoch": 4.67, "learning_rate": 0.00010403806212774747, "loss": 0.2197, "step": 419000 }, { "epoch": 4.67, "eval_loss": 0.20917142927646637, "eval_runtime": 2.6142, "eval_samples_per_second": 878.667, "eval_steps_per_second": 13.771, "step": 419000 }, { "epoch": 4.68, "learning_rate": 0.00010392932488026147, "loss": 0.2194, "step": 419500 }, { "epoch": 4.68, "learning_rate": 0.000103820522210915, "loss": 0.2196, "step": 420000 }, { "epoch": 4.68, "eval_loss": 0.20849910378456116, "eval_runtime": 2.6737, "eval_samples_per_second": 859.121, "eval_steps_per_second": 13.465, "step": 420000 }, { "epoch": 4.69, "learning_rate": 0.00010371165441717041, "loss": 0.2194, "step": 420500 }, { "epoch": 4.7, "learning_rate": 0.00010360272179666802, "loss": 0.2192, "step": 421000 }, { "epoch": 4.7, "eval_loss": 0.2099054604768753, "eval_runtime": 2.6463, "eval_samples_per_second": 868.003, "eval_steps_per_second": 13.604, "step": 421000 }, { "epoch": 4.7, "learning_rate": 0.00010349372464722555, "loss": 0.2192, "step": 421500 }, { "epoch": 4.71, "learning_rate": 0.00010338466326683697, "loss": 0.2192, "step": 422000 }, { "epoch": 4.71, "eval_loss": 0.20990021526813507, "eval_runtime": 2.699, "eval_samples_per_second": 851.063, "eval_steps_per_second": 13.338, "step": 422000 }, { "epoch": 4.71, "learning_rate": 0.00010327553795367197, "loss": 0.2191, "step": 422500 }, { "epoch": 4.72, "learning_rate": 0.00010316634900607497, "loss": 0.2187, "step": 423000 }, { "epoch": 4.72, "eval_loss": 0.21041807532310486, "eval_runtime": 2.6813, "eval_samples_per_second": 856.689, "eval_steps_per_second": 13.427, "step": 423000 }, { "epoch": 4.72, "learning_rate": 0.0001030570967225644, "loss": 0.219, "step": 423500 }, { "epoch": 4.73, "learning_rate": 0.00010294778140183182, "loss": 0.2188, "step": 424000 }, { "epoch": 4.73, "eval_loss": 0.20911771059036255, "eval_runtime": 2.7061, "eval_samples_per_second": 848.811, "eval_steps_per_second": 13.303, "step": 424000 }, { "epoch": 4.73, "learning_rate": 0.00010283840334274117, "loss": 0.2189, "step": 424500 }, { "epoch": 4.74, "learning_rate": 0.00010272896284432785, "loss": 0.2187, "step": 425000 }, { "epoch": 4.74, "eval_loss": 0.20740678906440735, "eval_runtime": 2.6921, "eval_samples_per_second": 853.226, "eval_steps_per_second": 13.372, "step": 425000 }, { "epoch": 4.75, "learning_rate": 0.00010261946020579799, "loss": 0.2188, "step": 425500 }, { "epoch": 4.75, "learning_rate": 0.00010250989572652766, "loss": 0.2182, "step": 426000 }, { "epoch": 4.75, "eval_loss": 0.20986562967300415, "eval_runtime": 2.6422, "eval_samples_per_second": 869.349, "eval_steps_per_second": 13.625, "step": 426000 }, { "epoch": 4.76, "learning_rate": 0.00010240026970606198, "loss": 0.2189, "step": 426500 }, { "epoch": 4.76, "learning_rate": 0.00010229058244411427, "loss": 0.2179, "step": 427000 }, { "epoch": 4.76, "eval_loss": 0.20823934674263, "eval_runtime": 2.6772, "eval_samples_per_second": 857.994, "eval_steps_per_second": 13.447, "step": 427000 }, { "epoch": 4.77, "learning_rate": 0.0001021808342405653, "loss": 0.2183, "step": 427500 }, { "epoch": 4.77, "learning_rate": 0.00010207102539546251, "loss": 0.2183, "step": 428000 }, { "epoch": 4.77, "eval_loss": 0.2062913179397583, "eval_runtime": 2.6582, "eval_samples_per_second": 864.108, "eval_steps_per_second": 13.543, "step": 428000 }, { "epoch": 4.78, "learning_rate": 0.00010196115620901904, "loss": 0.2184, "step": 428500 }, { "epoch": 4.78, "learning_rate": 0.00010185122698161311, "loss": 0.2178, "step": 429000 }, { "epoch": 4.78, "eval_loss": 0.2081955075263977, "eval_runtime": 2.6196, "eval_samples_per_second": 876.837, "eval_steps_per_second": 13.742, "step": 429000 }, { "epoch": 4.79, "learning_rate": 0.00010174123801378698, "loss": 0.2178, "step": 429500 }, { "epoch": 4.8, "learning_rate": 0.00010163118960624632, "loss": 0.2178, "step": 430000 }, { "epoch": 4.8, "eval_loss": 0.2074689269065857, "eval_runtime": 2.661, "eval_samples_per_second": 863.204, "eval_steps_per_second": 13.529, "step": 430000 }, { "epoch": 4.8, "learning_rate": 0.00010152108205985925, "loss": 0.2179, "step": 430500 }, { "epoch": 4.81, "learning_rate": 0.00010141091567565561, "loss": 0.2176, "step": 431000 }, { "epoch": 4.81, "eval_loss": 0.20468705892562866, "eval_runtime": 2.535, "eval_samples_per_second": 906.124, "eval_steps_per_second": 14.201, "step": 431000 }, { "epoch": 4.81, "learning_rate": 0.00010130069075482611, "loss": 0.2174, "step": 431500 }, { "epoch": 4.82, "learning_rate": 0.00010119040759872142, "loss": 0.2178, "step": 432000 }, { "epoch": 4.82, "eval_loss": 0.20778746902942657, "eval_runtime": 2.5027, "eval_samples_per_second": 917.795, "eval_steps_per_second": 14.384, "step": 432000 }, { "epoch": 4.82, "learning_rate": 0.00010108006650885151, "loss": 0.2171, "step": 432500 }, { "epoch": 4.83, "learning_rate": 0.00010096966778688472, "loss": 0.2175, "step": 433000 }, { "epoch": 4.83, "eval_loss": 0.2072180211544037, "eval_runtime": 2.6325, "eval_samples_per_second": 872.544, "eval_steps_per_second": 13.675, "step": 433000 }, { "epoch": 4.83, "learning_rate": 0.00010085921173464691, "loss": 0.2175, "step": 433500 }, { "epoch": 4.84, "learning_rate": 0.00010074869865412074, "loss": 0.217, "step": 434000 }, { "epoch": 4.84, "eval_loss": 0.20796707272529602, "eval_runtime": 2.5383, "eval_samples_per_second": 904.951, "eval_steps_per_second": 14.183, "step": 434000 }, { "epoch": 4.85, "learning_rate": 0.00010063812884744475, "loss": 0.217, "step": 434500 }, { "epoch": 4.85, "learning_rate": 0.00010052750261691254, "loss": 0.2172, "step": 435000 }, { "epoch": 4.85, "eval_loss": 0.2071193903684616, "eval_runtime": 2.5047, "eval_samples_per_second": 917.082, "eval_steps_per_second": 14.373, "step": 435000 }, { "epoch": 4.86, "learning_rate": 0.00010041682026497199, "loss": 0.2165, "step": 435500 }, { "epoch": 4.86, "learning_rate": 0.0001003060820942245, "loss": 0.2168, "step": 436000 }, { "epoch": 4.86, "eval_loss": 0.205715611577034, "eval_runtime": 2.6955, "eval_samples_per_second": 852.165, "eval_steps_per_second": 13.356, "step": 436000 }, { "epoch": 4.87, "learning_rate": 0.00010019528840742392, "loss": 0.2172, "step": 436500 }, { "epoch": 4.87, "learning_rate": 0.00010008443950747599, "loss": 0.2166, "step": 437000 }, { "epoch": 4.87, "eval_loss": 0.20823287963867188, "eval_runtime": 2.581, "eval_samples_per_second": 889.967, "eval_steps_per_second": 13.948, "step": 437000 }, { "epoch": 4.88, "learning_rate": 9.997353569743736e-05, "loss": 0.2168, "step": 437500 }, { "epoch": 4.88, "learning_rate": 9.986257728051483e-05, "loss": 0.2173, "step": 438000 }, { "epoch": 4.88, "eval_loss": 0.2068750560283661, "eval_runtime": 2.5735, "eval_samples_per_second": 892.546, "eval_steps_per_second": 13.989, "step": 438000 }, { "epoch": 4.89, "learning_rate": 9.975156456006448e-05, "loss": 0.2164, "step": 438500 }, { "epoch": 4.9, "learning_rate": 9.964049783959082e-05, "loss": 0.2168, "step": 439000 }, { "epoch": 4.9, "eval_loss": 0.2076042890548706, "eval_runtime": 2.5663, "eval_samples_per_second": 895.048, "eval_steps_per_second": 14.028, "step": 439000 }, { "epoch": 4.9, "learning_rate": 9.952937742274605e-05, "loss": 0.2166, "step": 439500 }, { "epoch": 4.91, "learning_rate": 9.94182036133291e-05, "loss": 0.2167, "step": 440000 }, { "epoch": 4.91, "eval_loss": 0.20752529799938202, "eval_runtime": 2.6206, "eval_samples_per_second": 876.507, "eval_steps_per_second": 13.737, "step": 440000 }, { "epoch": 4.91, "learning_rate": 9.930697671528499e-05, "loss": 0.2165, "step": 440500 }, { "epoch": 4.92, "learning_rate": 9.919569703270376e-05, "loss": 0.2166, "step": 441000 }, { "epoch": 4.92, "eval_loss": 0.20720918476581573, "eval_runtime": 2.6091, "eval_samples_per_second": 880.39, "eval_steps_per_second": 13.798, "step": 441000 }, { "epoch": 4.92, "learning_rate": 9.908436486981984e-05, "loss": 0.2164, "step": 441500 }, { "epoch": 4.93, "learning_rate": 9.89729805310111e-05, "loss": 0.217, "step": 442000 }, { "epoch": 4.93, "eval_loss": 0.20782898366451263, "eval_runtime": 2.5759, "eval_samples_per_second": 891.724, "eval_steps_per_second": 13.976, "step": 442000 }, { "epoch": 4.94, "learning_rate": 9.886154432079803e-05, "loss": 0.2166, "step": 442500 }, { "epoch": 4.94, "learning_rate": 9.875005654384307e-05, "loss": 0.2163, "step": 443000 }, { "epoch": 4.94, "eval_loss": 0.20723912119865417, "eval_runtime": 2.6842, "eval_samples_per_second": 855.749, "eval_steps_per_second": 13.412, "step": 443000 }, { "epoch": 4.95, "learning_rate": 9.863851750494944e-05, "loss": 0.2168, "step": 443500 }, { "epoch": 4.95, "learning_rate": 9.852692750906071e-05, "loss": 0.2165, "step": 444000 }, { "epoch": 4.95, "eval_loss": 0.20486100018024445, "eval_runtime": 2.6649, "eval_samples_per_second": 861.956, "eval_steps_per_second": 13.509, "step": 444000 }, { "epoch": 4.96, "learning_rate": 9.841528686125961e-05, "loss": 0.2158, "step": 444500 }, { "epoch": 4.96, "learning_rate": 9.830359586676737e-05, "loss": 0.2157, "step": 445000 }, { "epoch": 4.96, "eval_loss": 0.20740051567554474, "eval_runtime": 2.6655, "eval_samples_per_second": 861.742, "eval_steps_per_second": 13.506, "step": 445000 }, { "epoch": 4.97, "learning_rate": 9.819185483094299e-05, "loss": 0.2154, "step": 445500 }, { "epoch": 4.97, "learning_rate": 9.808006405928215e-05, "loss": 0.216, "step": 446000 }, { "epoch": 4.97, "eval_loss": 0.20571541786193848, "eval_runtime": 2.5424, "eval_samples_per_second": 903.461, "eval_steps_per_second": 14.16, "step": 446000 }, { "epoch": 4.98, "learning_rate": 9.796822385741657e-05, "loss": 0.216, "step": 446500 }, { "epoch": 4.99, "learning_rate": 9.785633453111306e-05, "loss": 0.2159, "step": 447000 }, { "epoch": 4.99, "eval_loss": 0.20638670027256012, "eval_runtime": 2.5508, "eval_samples_per_second": 900.496, "eval_steps_per_second": 14.113, "step": 447000 }, { "epoch": 4.99, "learning_rate": 9.774439638627277e-05, "loss": 0.2156, "step": 447500 }, { "epoch": 5.0, "learning_rate": 9.763240972893037e-05, "loss": 0.216, "step": 448000 }, { "epoch": 5.0, "eval_loss": 0.20472365617752075, "eval_runtime": 2.6318, "eval_samples_per_second": 872.783, "eval_steps_per_second": 13.679, "step": 448000 }, { "epoch": 5.0, "learning_rate": 9.752037486525302e-05, "loss": 0.2157, "step": 448500 }, { "epoch": 5.01, "learning_rate": 9.740829210153984e-05, "loss": 0.2153, "step": 449000 }, { "epoch": 5.01, "eval_loss": 0.2034875452518463, "eval_runtime": 2.6776, "eval_samples_per_second": 857.846, "eval_steps_per_second": 13.445, "step": 449000 }, { "epoch": 5.01, "learning_rate": 9.729616174422077e-05, "loss": 0.2153, "step": 449500 }, { "epoch": 5.02, "learning_rate": 9.718398409985593e-05, "loss": 0.2157, "step": 450000 }, { "epoch": 5.02, "eval_loss": 0.20429137349128723, "eval_runtime": 2.6747, "eval_samples_per_second": 858.786, "eval_steps_per_second": 13.459, "step": 450000 }, { "epoch": 5.02, "learning_rate": 9.707175947513475e-05, "loss": 0.215, "step": 450500 }, { "epoch": 5.03, "learning_rate": 9.695948817687504e-05, "loss": 0.2153, "step": 451000 }, { "epoch": 5.03, "eval_loss": 0.20547254383563995, "eval_runtime": 2.5571, "eval_samples_per_second": 898.273, "eval_steps_per_second": 14.078, "step": 451000 }, { "epoch": 5.04, "learning_rate": 9.684717051202227e-05, "loss": 0.2154, "step": 451500 }, { "epoch": 5.04, "learning_rate": 9.673480678764858e-05, "loss": 0.2151, "step": 452000 }, { "epoch": 5.04, "eval_loss": 0.204800084233284, "eval_runtime": 2.6202, "eval_samples_per_second": 876.642, "eval_steps_per_second": 13.739, "step": 452000 }, { "epoch": 5.05, "learning_rate": 9.662239731095222e-05, "loss": 0.2153, "step": 452500 }, { "epoch": 5.05, "learning_rate": 9.650994238925626e-05, "loss": 0.2149, "step": 453000 }, { "epoch": 5.05, "eval_loss": 0.20622511208057404, "eval_runtime": 2.6307, "eval_samples_per_second": 873.167, "eval_steps_per_second": 13.685, "step": 453000 }, { "epoch": 5.06, "learning_rate": 9.63974423300083e-05, "loss": 0.2148, "step": 453500 }, { "epoch": 5.06, "learning_rate": 9.628489744077911e-05, "loss": 0.2151, "step": 454000 }, { "epoch": 5.06, "eval_loss": 0.20128118991851807, "eval_runtime": 2.6992, "eval_samples_per_second": 851.002, "eval_steps_per_second": 13.337, "step": 454000 }, { "epoch": 5.07, "learning_rate": 9.617230802926214e-05, "loss": 0.2149, "step": 454500 }, { "epoch": 5.07, "learning_rate": 9.60596744032726e-05, "loss": 0.2151, "step": 455000 }, { "epoch": 5.07, "eval_loss": 0.2051314264535904, "eval_runtime": 2.5516, "eval_samples_per_second": 900.232, "eval_steps_per_second": 14.109, "step": 455000 }, { "epoch": 5.08, "learning_rate": 9.594699687074648e-05, "loss": 0.2147, "step": 455500 }, { "epoch": 5.09, "learning_rate": 9.583427573973982e-05, "loss": 0.2151, "step": 456000 }, { "epoch": 5.09, "eval_loss": 0.2050078958272934, "eval_runtime": 2.6417, "eval_samples_per_second": 869.501, "eval_steps_per_second": 13.627, "step": 456000 }, { "epoch": 5.09, "learning_rate": 9.57215113184279e-05, "loss": 0.2144, "step": 456500 }, { "epoch": 5.1, "learning_rate": 9.560870391510441e-05, "loss": 0.2149, "step": 457000 }, { "epoch": 5.1, "eval_loss": 0.20550428330898285, "eval_runtime": 2.6145, "eval_samples_per_second": 878.571, "eval_steps_per_second": 13.77, "step": 457000 }, { "epoch": 5.1, "learning_rate": 9.549585383818041e-05, "loss": 0.2152, "step": 457500 }, { "epoch": 5.11, "learning_rate": 9.538296139618371e-05, "loss": 0.2147, "step": 458000 }, { "epoch": 5.11, "eval_loss": 0.20618882775306702, "eval_runtime": 2.5432, "eval_samples_per_second": 903.188, "eval_steps_per_second": 14.155, "step": 458000 }, { "epoch": 5.11, "learning_rate": 9.527002689775799e-05, "loss": 0.2147, "step": 458500 }, { "epoch": 5.12, "learning_rate": 9.515705065166178e-05, "loss": 0.2143, "step": 459000 }, { "epoch": 5.12, "eval_loss": 0.20430730283260345, "eval_runtime": 2.5628, "eval_samples_per_second": 896.27, "eval_steps_per_second": 14.047, "step": 459000 }, { "epoch": 5.12, "learning_rate": 9.504403296676786e-05, "loss": 0.2148, "step": 459500 }, { "epoch": 5.13, "learning_rate": 9.493097415206228e-05, "loss": 0.2142, "step": 460000 }, { "epoch": 5.13, "eval_loss": 0.20442381501197815, "eval_runtime": 2.5555, "eval_samples_per_second": 898.857, "eval_steps_per_second": 14.087, "step": 460000 }, { "epoch": 5.14, "learning_rate": 9.481787451664349e-05, "loss": 0.2139, "step": 460500 }, { "epoch": 5.14, "learning_rate": 9.47047343697216e-05, "loss": 0.2141, "step": 461000 }, { "epoch": 5.14, "eval_loss": 0.2020626962184906, "eval_runtime": 2.9504, "eval_samples_per_second": 778.533, "eval_steps_per_second": 12.202, "step": 461000 }, { "epoch": 5.15, "learning_rate": 9.459155402061744e-05, "loss": 0.2143, "step": 461500 }, { "epoch": 5.15, "learning_rate": 9.447833377876176e-05, "loss": 0.2146, "step": 462000 }, { "epoch": 5.15, "eval_loss": 0.20509694516658783, "eval_runtime": 2.6371, "eval_samples_per_second": 871.021, "eval_steps_per_second": 13.651, "step": 462000 }, { "epoch": 5.16, "learning_rate": 9.436507395369439e-05, "loss": 0.2146, "step": 462500 }, { "epoch": 5.16, "learning_rate": 9.425177485506336e-05, "loss": 0.2143, "step": 463000 }, { "epoch": 5.16, "eval_loss": 0.20591478049755096, "eval_runtime": 2.5305, "eval_samples_per_second": 907.719, "eval_steps_per_second": 14.226, "step": 463000 }, { "epoch": 5.17, "learning_rate": 9.413843679262408e-05, "loss": 0.2143, "step": 463500 }, { "epoch": 5.17, "learning_rate": 9.402506007623848e-05, "loss": 0.2152, "step": 464000 }, { "epoch": 5.17, "eval_loss": 0.20412901043891907, "eval_runtime": 2.4892, "eval_samples_per_second": 922.805, "eval_steps_per_second": 14.463, "step": 464000 }, { "epoch": 5.18, "learning_rate": 9.391164501587417e-05, "loss": 0.2141, "step": 464500 }, { "epoch": 5.19, "learning_rate": 9.379819192160362e-05, "loss": 0.214, "step": 465000 }, { "epoch": 5.19, "eval_loss": 0.2041676938533783, "eval_runtime": 6.2441, "eval_samples_per_second": 367.868, "eval_steps_per_second": 5.765, "step": 465000 }, { "epoch": 5.19, "learning_rate": 9.368470110360323e-05, "loss": 0.2141, "step": 465500 }, { "epoch": 5.2, "learning_rate": 9.357117287215258e-05, "loss": 0.2138, "step": 466000 }, { "epoch": 5.2, "eval_loss": 0.20280323922634125, "eval_runtime": 2.5119, "eval_samples_per_second": 914.43, "eval_steps_per_second": 14.332, "step": 466000 }, { "epoch": 5.2, "learning_rate": 9.345760753763347e-05, "loss": 0.2139, "step": 466500 }, { "epoch": 5.21, "learning_rate": 9.334400541052928e-05, "loss": 0.2133, "step": 467000 }, { "epoch": 5.21, "eval_loss": 0.20342190563678741, "eval_runtime": 2.466, "eval_samples_per_second": 931.458, "eval_steps_per_second": 14.598, "step": 467000 }, { "epoch": 5.21, "learning_rate": 9.323036680142382e-05, "loss": 0.2138, "step": 467500 }, { "epoch": 5.22, "learning_rate": 9.311669202100073e-05, "loss": 0.2136, "step": 468000 }, { "epoch": 5.22, "eval_loss": 0.20149648189544678, "eval_runtime": 2.5403, "eval_samples_per_second": 904.23, "eval_steps_per_second": 14.172, "step": 468000 }, { "epoch": 5.23, "learning_rate": 9.300298138004249e-05, "loss": 0.2137, "step": 468500 }, { "epoch": 5.23, "learning_rate": 9.288923518942968e-05, "loss": 0.2135, "step": 469000 }, { "epoch": 5.23, "eval_loss": 0.2041381597518921, "eval_runtime": 3.0567, "eval_samples_per_second": 751.455, "eval_steps_per_second": 11.777, "step": 469000 }, { "epoch": 5.24, "learning_rate": 9.277545376014005e-05, "loss": 0.2134, "step": 469500 }, { "epoch": 5.24, "learning_rate": 9.26616374032477e-05, "loss": 0.2135, "step": 470000 }, { "epoch": 5.24, "eval_loss": 0.2072334885597229, "eval_runtime": 2.5117, "eval_samples_per_second": 914.527, "eval_steps_per_second": 14.333, "step": 470000 }, { "epoch": 5.25, "learning_rate": 9.254778642992213e-05, "loss": 0.2133, "step": 470500 }, { "epoch": 5.25, "learning_rate": 9.243390115142761e-05, "loss": 0.2132, "step": 471000 }, { "epoch": 5.25, "eval_loss": 0.20281285047531128, "eval_runtime": 2.5255, "eval_samples_per_second": 909.541, "eval_steps_per_second": 14.255, "step": 471000 }, { "epoch": 5.26, "learning_rate": 9.231998187912211e-05, "loss": 0.2135, "step": 471500 }, { "epoch": 5.26, "learning_rate": 9.220602892445661e-05, "loss": 0.2131, "step": 472000 }, { "epoch": 5.26, "eval_loss": 0.20354430377483368, "eval_runtime": 2.6683, "eval_samples_per_second": 860.841, "eval_steps_per_second": 13.492, "step": 472000 }, { "epoch": 5.27, "learning_rate": 9.209204259897412e-05, "loss": 0.2135, "step": 472500 }, { "epoch": 5.28, "learning_rate": 9.197802321430889e-05, "loss": 0.2131, "step": 473000 }, { "epoch": 5.28, "eval_loss": 0.20278283953666687, "eval_runtime": 2.6368, "eval_samples_per_second": 871.12, "eval_steps_per_second": 13.653, "step": 473000 }, { "epoch": 5.28, "learning_rate": 9.186397108218558e-05, "loss": 0.2131, "step": 473500 }, { "epoch": 5.29, "learning_rate": 9.174988651441833e-05, "loss": 0.2133, "step": 474000 }, { "epoch": 5.29, "eval_loss": 0.20307588577270508, "eval_runtime": 2.5584, "eval_samples_per_second": 897.827, "eval_steps_per_second": 14.071, "step": 474000 }, { "epoch": 5.29, "learning_rate": 9.163576982291006e-05, "loss": 0.2128, "step": 474500 }, { "epoch": 5.3, "learning_rate": 9.152162131965137e-05, "loss": 0.213, "step": 475000 }, { "epoch": 5.3, "eval_loss": 0.2002372145652771, "eval_runtime": 2.521, "eval_samples_per_second": 911.138, "eval_steps_per_second": 14.28, "step": 475000 }, { "epoch": 5.3, "learning_rate": 9.140744131671994e-05, "loss": 0.2133, "step": 475500 }, { "epoch": 5.31, "learning_rate": 9.129323012627956e-05, "loss": 0.2129, "step": 476000 }, { "epoch": 5.31, "eval_loss": 0.2028408944606781, "eval_runtime": 2.5037, "eval_samples_per_second": 917.448, "eval_steps_per_second": 14.379, "step": 476000 }, { "epoch": 5.31, "learning_rate": 9.117898806057925e-05, "loss": 0.2129, "step": 476500 }, { "epoch": 5.32, "learning_rate": 9.106471543195244e-05, "loss": 0.2131, "step": 477000 }, { "epoch": 5.32, "eval_loss": 0.20160850882530212, "eval_runtime": 2.4426, "eval_samples_per_second": 940.394, "eval_steps_per_second": 14.738, "step": 477000 }, { "epoch": 5.33, "learning_rate": 9.095041255281616e-05, "loss": 0.2124, "step": 477500 }, { "epoch": 5.33, "learning_rate": 9.08360797356701e-05, "loss": 0.2129, "step": 478000 }, { "epoch": 5.33, "eval_loss": 0.2036493867635727, "eval_runtime": 2.4816, "eval_samples_per_second": 925.609, "eval_steps_per_second": 14.507, "step": 478000 }, { "epoch": 5.34, "learning_rate": 9.07217172930958e-05, "loss": 0.2126, "step": 478500 }, { "epoch": 5.34, "learning_rate": 9.060732553775582e-05, "loss": 0.2127, "step": 479000 }, { "epoch": 5.34, "eval_loss": 0.19979223608970642, "eval_runtime": 2.5343, "eval_samples_per_second": 906.38, "eval_steps_per_second": 14.205, "step": 479000 }, { "epoch": 5.35, "learning_rate": 9.049290478239287e-05, "loss": 0.2126, "step": 479500 }, { "epoch": 5.35, "learning_rate": 9.037845533982892e-05, "loss": 0.2129, "step": 480000 }, { "epoch": 5.35, "eval_loss": 0.20060274004936218, "eval_runtime": 2.5428, "eval_samples_per_second": 903.348, "eval_steps_per_second": 14.158, "step": 480000 }, { "epoch": 5.36, "learning_rate": 9.02639775229644e-05, "loss": 0.2124, "step": 480500 }, { "epoch": 5.36, "learning_rate": 9.014947164477721e-05, "loss": 0.2126, "step": 481000 }, { "epoch": 5.36, "eval_loss": 0.20033051073551178, "eval_runtime": 2.4935, "eval_samples_per_second": 921.209, "eval_steps_per_second": 14.438, "step": 481000 }, { "epoch": 5.37, "learning_rate": 9.003493801832213e-05, "loss": 0.212, "step": 481500 }, { "epoch": 5.38, "learning_rate": 8.992037695672967e-05, "loss": 0.2127, "step": 482000 }, { "epoch": 5.38, "eval_loss": 0.2025318741798401, "eval_runtime": 2.5419, "eval_samples_per_second": 903.65, "eval_steps_per_second": 14.163, "step": 482000 }, { "epoch": 5.38, "learning_rate": 8.980578877320544e-05, "loss": 0.2128, "step": 482500 }, { "epoch": 5.39, "learning_rate": 8.969117378102912e-05, "loss": 0.2122, "step": 483000 }, { "epoch": 5.39, "eval_loss": 0.2024153620004654, "eval_runtime": 2.5406, "eval_samples_per_second": 904.131, "eval_steps_per_second": 14.17, "step": 483000 }, { "epoch": 5.39, "learning_rate": 8.957653229355374e-05, "loss": 0.212, "step": 483500 }, { "epoch": 5.4, "learning_rate": 8.946186462420478e-05, "loss": 0.2123, "step": 484000 }, { "epoch": 5.4, "eval_loss": 0.20227807760238647, "eval_runtime": 2.6148, "eval_samples_per_second": 878.471, "eval_steps_per_second": 13.768, "step": 484000 }, { "epoch": 5.4, "learning_rate": 8.934717108647922e-05, "loss": 0.2122, "step": 484500 }, { "epoch": 5.41, "learning_rate": 8.923245199394482e-05, "loss": 0.2123, "step": 485000 }, { "epoch": 5.41, "eval_loss": 0.2004445344209671, "eval_runtime": 2.6169, "eval_samples_per_second": 877.747, "eval_steps_per_second": 13.757, "step": 485000 }, { "epoch": 5.41, "learning_rate": 8.911770766023921e-05, "loss": 0.212, "step": 485500 }, { "epoch": 5.42, "learning_rate": 8.900293839906903e-05, "loss": 0.2113, "step": 486000 }, { "epoch": 5.42, "eval_loss": 0.20107805728912354, "eval_runtime": 2.6497, "eval_samples_per_second": 866.906, "eval_steps_per_second": 13.587, "step": 486000 }, { "epoch": 5.43, "learning_rate": 8.888814452420903e-05, "loss": 0.2238, "step": 486500 }, { "epoch": 5.43, "learning_rate": 8.87733263495013e-05, "loss": 0.2125, "step": 487000 }, { "epoch": 5.43, "eval_loss": 0.20239748060703278, "eval_runtime": 2.6391, "eval_samples_per_second": 870.377, "eval_steps_per_second": 13.641, "step": 487000 }, { "epoch": 5.44, "learning_rate": 8.865848418885434e-05, "loss": 0.2116, "step": 487500 }, { "epoch": 5.44, "learning_rate": 8.85436183562422e-05, "loss": 0.2117, "step": 488000 }, { "epoch": 5.44, "eval_loss": 0.20209769904613495, "eval_runtime": 2.6167, "eval_samples_per_second": 877.814, "eval_steps_per_second": 13.758, "step": 488000 }, { "epoch": 5.45, "learning_rate": 8.842872916570374e-05, "loss": 0.2117, "step": 488500 }, { "epoch": 5.45, "learning_rate": 8.83138169313416e-05, "loss": 0.2117, "step": 489000 }, { "epoch": 5.45, "eval_loss": 0.20107334852218628, "eval_runtime": 2.6263, "eval_samples_per_second": 874.613, "eval_steps_per_second": 13.707, "step": 489000 }, { "epoch": 5.46, "learning_rate": 8.819888196732144e-05, "loss": 0.2116, "step": 489500 }, { "epoch": 5.46, "learning_rate": 8.808392458787103e-05, "loss": 0.2116, "step": 490000 }, { "epoch": 5.46, "eval_loss": 0.2020297795534134, "eval_runtime": 2.6641, "eval_samples_per_second": 862.22, "eval_steps_per_second": 13.513, "step": 490000 }, { "epoch": 5.47, "learning_rate": 8.796894510727945e-05, "loss": 0.2116, "step": 490500 }, { "epoch": 5.48, "learning_rate": 8.78539438398963e-05, "loss": 0.2112, "step": 491000 }, { "epoch": 5.48, "eval_loss": 0.20125530660152435, "eval_runtime": 2.6454, "eval_samples_per_second": 868.302, "eval_steps_per_second": 13.609, "step": 491000 }, { "epoch": 5.48, "learning_rate": 8.773892110013058e-05, "loss": 0.2116, "step": 491500 }, { "epoch": 5.49, "learning_rate": 8.762387720245008e-05, "loss": 0.2114, "step": 492000 }, { "epoch": 5.49, "eval_loss": 0.2021346092224121, "eval_runtime": 2.6349, "eval_samples_per_second": 871.772, "eval_steps_per_second": 13.663, "step": 492000 }, { "epoch": 5.49, "learning_rate": 8.750881246138043e-05, "loss": 0.2115, "step": 492500 }, { "epoch": 5.5, "learning_rate": 8.73937271915042e-05, "loss": 0.2115, "step": 493000 }, { "epoch": 5.5, "eval_loss": 0.20063868165016174, "eval_runtime": 2.6134, "eval_samples_per_second": 878.933, "eval_steps_per_second": 13.775, "step": 493000 }, { "epoch": 5.5, "learning_rate": 8.727862170746019e-05, "loss": 0.2112, "step": 493500 }, { "epoch": 5.51, "learning_rate": 8.716349632394235e-05, "loss": 0.2113, "step": 494000 }, { "epoch": 5.51, "eval_loss": 0.2013174444437027, "eval_runtime": 2.5993, "eval_samples_per_second": 883.689, "eval_steps_per_second": 13.85, "step": 494000 }, { "epoch": 5.52, "learning_rate": 8.70483513556991e-05, "loss": 0.2111, "step": 494500 }, { "epoch": 5.52, "learning_rate": 8.69331871175324e-05, "loss": 0.2111, "step": 495000 }, { "epoch": 5.52, "eval_loss": 0.20253530144691467, "eval_runtime": 2.655, "eval_samples_per_second": 865.174, "eval_steps_per_second": 13.56, "step": 495000 }, { "epoch": 5.53, "learning_rate": 8.681800392429684e-05, "loss": 0.2111, "step": 495500 }, { "epoch": 5.53, "learning_rate": 8.67028020908989e-05, "loss": 0.2112, "step": 496000 }, { "epoch": 5.53, "eval_loss": 0.2021927386522293, "eval_runtime": 2.5884, "eval_samples_per_second": 887.415, "eval_steps_per_second": 13.908, "step": 496000 }, { "epoch": 5.54, "learning_rate": 8.658758193229601e-05, "loss": 0.211, "step": 496500 }, { "epoch": 5.54, "learning_rate": 8.647234376349565e-05, "loss": 0.2107, "step": 497000 }, { "epoch": 5.54, "eval_loss": 0.19982150197029114, "eval_runtime": 2.5228, "eval_samples_per_second": 910.512, "eval_steps_per_second": 14.27, "step": 497000 }, { "epoch": 5.55, "learning_rate": 8.635708789955458e-05, "loss": 0.2108, "step": 497500 }, { "epoch": 5.55, "learning_rate": 8.624181465557794e-05, "loss": 0.2109, "step": 498000 }, { "epoch": 5.55, "eval_loss": 0.2010236382484436, "eval_runtime": 2.4638, "eval_samples_per_second": 932.313, "eval_steps_per_second": 14.612, "step": 498000 }, { "epoch": 5.56, "learning_rate": 8.612652434671837e-05, "loss": 0.2108, "step": 498500 }, { "epoch": 5.57, "learning_rate": 8.601121728817519e-05, "loss": 0.211, "step": 499000 }, { "epoch": 5.57, "eval_loss": 0.19817927479743958, "eval_runtime": 2.5254, "eval_samples_per_second": 909.547, "eval_steps_per_second": 14.255, "step": 499000 }, { "epoch": 5.57, "learning_rate": 8.589589379519346e-05, "loss": 0.2109, "step": 499500 }, { "epoch": 5.58, "learning_rate": 8.578055418306327e-05, "loss": 0.2107, "step": 500000 }, { "epoch": 5.58, "eval_loss": 0.199160635471344, "eval_runtime": 2.6076, "eval_samples_per_second": 880.873, "eval_steps_per_second": 13.806, "step": 500000 }, { "epoch": 5.58, "learning_rate": 8.566519876711864e-05, "loss": 0.2109, "step": 500500 }, { "epoch": 5.59, "learning_rate": 8.55498278627369e-05, "loss": 0.2107, "step": 501000 }, { "epoch": 5.59, "eval_loss": 0.1986326277256012, "eval_runtime": 2.5229, "eval_samples_per_second": 910.451, "eval_steps_per_second": 14.269, "step": 501000 }, { "epoch": 5.59, "learning_rate": 8.543444178533773e-05, "loss": 0.2101, "step": 501500 }, { "epoch": 5.6, "learning_rate": 8.531904085038221e-05, "loss": 0.2104, "step": 502000 }, { "epoch": 5.6, "eval_loss": 0.1985243856906891, "eval_runtime": 2.533, "eval_samples_per_second": 906.836, "eval_steps_per_second": 14.212, "step": 502000 }, { "epoch": 5.6, "learning_rate": 8.520362537337214e-05, "loss": 0.2104, "step": 502500 }, { "epoch": 5.61, "learning_rate": 8.508819566984897e-05, "loss": 0.2105, "step": 503000 }, { "epoch": 5.61, "eval_loss": 0.19903981685638428, "eval_runtime": 2.5776, "eval_samples_per_second": 891.131, "eval_steps_per_second": 13.966, "step": 503000 }, { "epoch": 5.62, "learning_rate": 8.497275205539314e-05, "loss": 0.2103, "step": 503500 }, { "epoch": 5.62, "learning_rate": 8.485729484562307e-05, "loss": 0.2104, "step": 504000 }, { "epoch": 5.62, "eval_loss": 0.20253504812717438, "eval_runtime": 2.542, "eval_samples_per_second": 903.632, "eval_steps_per_second": 14.162, "step": 504000 }, { "epoch": 5.63, "learning_rate": 8.474182435619437e-05, "loss": 0.21, "step": 504500 }, { "epoch": 5.63, "learning_rate": 8.462634090279895e-05, "loss": 0.2099, "step": 505000 }, { "epoch": 5.63, "eval_loss": 0.20055775344371796, "eval_runtime": 2.5318, "eval_samples_per_second": 907.265, "eval_steps_per_second": 14.219, "step": 505000 }, { "epoch": 5.64, "learning_rate": 8.451084480116415e-05, "loss": 0.2099, "step": 505500 }, { "epoch": 5.64, "learning_rate": 8.439533636705194e-05, "loss": 0.2102, "step": 506000 }, { "epoch": 5.64, "eval_loss": 0.1988956183195114, "eval_runtime": 2.5177, "eval_samples_per_second": 912.354, "eval_steps_per_second": 14.299, "step": 506000 }, { "epoch": 5.65, "learning_rate": 8.427981591625791e-05, "loss": 0.2097, "step": 506500 }, { "epoch": 5.65, "learning_rate": 8.416428376461061e-05, "loss": 0.2101, "step": 507000 }, { "epoch": 5.65, "eval_loss": 0.20103515684604645, "eval_runtime": 2.4839, "eval_samples_per_second": 924.771, "eval_steps_per_second": 14.494, "step": 507000 }, { "epoch": 5.66, "learning_rate": 8.404874022797049e-05, "loss": 0.21, "step": 507500 }, { "epoch": 5.67, "learning_rate": 8.393318562222916e-05, "loss": 0.2102, "step": 508000 }, { "epoch": 5.67, "eval_loss": 0.19926241040229797, "eval_runtime": 2.6513, "eval_samples_per_second": 866.384, "eval_steps_per_second": 13.578, "step": 508000 }, { "epoch": 5.67, "learning_rate": 8.381762026330858e-05, "loss": 0.2101, "step": 508500 }, { "epoch": 5.68, "learning_rate": 8.370204446715997e-05, "loss": 0.2098, "step": 509000 }, { "epoch": 5.68, "eval_loss": 0.2018594741821289, "eval_runtime": 2.6012, "eval_samples_per_second": 883.058, "eval_steps_per_second": 13.84, "step": 509000 }, { "epoch": 5.68, "learning_rate": 8.358645854976311e-05, "loss": 0.2101, "step": 509500 }, { "epoch": 5.69, "learning_rate": 8.347086282712556e-05, "loss": 0.2097, "step": 510000 }, { "epoch": 5.69, "eval_loss": 0.19868962466716766, "eval_runtime": 2.618, "eval_samples_per_second": 877.377, "eval_steps_per_second": 13.751, "step": 510000 }, { "epoch": 5.69, "learning_rate": 8.335525761528157e-05, "loss": 0.2097, "step": 510500 }, { "epoch": 5.7, "learning_rate": 8.323964323029136e-05, "loss": 0.2097, "step": 511000 }, { "epoch": 5.7, "eval_loss": 0.19855724275112152, "eval_runtime": 2.5751, "eval_samples_per_second": 892.001, "eval_steps_per_second": 13.98, "step": 511000 }, { "epoch": 5.7, "learning_rate": 8.312401998824027e-05, "loss": 0.2094, "step": 511500 }, { "epoch": 5.71, "learning_rate": 8.300838820523784e-05, "loss": 0.2098, "step": 512000 }, { "epoch": 5.71, "eval_loss": 0.20061442255973816, "eval_runtime": 2.5891, "eval_samples_per_second": 887.184, "eval_steps_per_second": 13.904, "step": 512000 }, { "epoch": 5.72, "learning_rate": 8.289274819741691e-05, "loss": 0.2094, "step": 512500 }, { "epoch": 5.72, "learning_rate": 8.277710028093289e-05, "loss": 0.2095, "step": 513000 }, { "epoch": 5.72, "eval_loss": 0.19978360831737518, "eval_runtime": 2.5112, "eval_samples_per_second": 914.704, "eval_steps_per_second": 14.336, "step": 513000 }, { "epoch": 5.73, "learning_rate": 8.266144477196274e-05, "loss": 0.2097, "step": 513500 }, { "epoch": 5.73, "learning_rate": 8.254578198670421e-05, "loss": 0.2096, "step": 514000 }, { "epoch": 5.73, "eval_loss": 0.19981496036052704, "eval_runtime": 2.6312, "eval_samples_per_second": 872.987, "eval_steps_per_second": 13.682, "step": 514000 }, { "epoch": 5.74, "learning_rate": 8.243011224137492e-05, "loss": 0.2091, "step": 514500 }, { "epoch": 5.74, "learning_rate": 8.231443585221157e-05, "loss": 0.2094, "step": 515000 }, { "epoch": 5.74, "eval_loss": 0.19933128356933594, "eval_runtime": 2.5527, "eval_samples_per_second": 899.838, "eval_steps_per_second": 14.103, "step": 515000 }, { "epoch": 5.75, "learning_rate": 8.219875313546898e-05, "loss": 0.2097, "step": 515500 }, { "epoch": 5.75, "learning_rate": 8.208306440741926e-05, "loss": 0.209, "step": 516000 }, { "epoch": 5.75, "eval_loss": 0.19918790459632874, "eval_runtime": 2.5243, "eval_samples_per_second": 909.957, "eval_steps_per_second": 14.261, "step": 516000 }, { "epoch": 5.76, "learning_rate": 8.196736998435101e-05, "loss": 0.2091, "step": 516500 }, { "epoch": 5.77, "learning_rate": 8.185167018256834e-05, "loss": 0.2092, "step": 517000 }, { "epoch": 5.77, "eval_loss": 0.19863919913768768, "eval_runtime": 2.6312, "eval_samples_per_second": 872.998, "eval_steps_per_second": 13.682, "step": 517000 }, { "epoch": 5.77, "learning_rate": 8.173596531839011e-05, "loss": 0.2089, "step": 517500 }, { "epoch": 5.78, "learning_rate": 8.162025570814896e-05, "loss": 0.2088, "step": 518000 }, { "epoch": 5.78, "eval_loss": 0.19913460314273834, "eval_runtime": 2.6329, "eval_samples_per_second": 872.416, "eval_steps_per_second": 13.673, "step": 518000 }, { "epoch": 5.78, "learning_rate": 8.150454166819059e-05, "loss": 0.2087, "step": 518500 }, { "epoch": 5.79, "learning_rate": 8.138882351487275e-05, "loss": 0.2086, "step": 519000 }, { "epoch": 5.79, "eval_loss": 0.20065449178218842, "eval_runtime": 2.6728, "eval_samples_per_second": 859.388, "eval_steps_per_second": 13.469, "step": 519000 }, { "epoch": 5.79, "learning_rate": 8.127310156456445e-05, "loss": 0.2088, "step": 519500 }, { "epoch": 5.8, "learning_rate": 8.115737613364511e-05, "loss": 0.2091, "step": 520000 }, { "epoch": 5.8, "eval_loss": 0.19827573001384735, "eval_runtime": 2.5025, "eval_samples_per_second": 917.889, "eval_steps_per_second": 14.386, "step": 520000 }, { "epoch": 5.81, "learning_rate": 8.104164753850357e-05, "loss": 0.2086, "step": 520500 }, { "epoch": 5.81, "learning_rate": 8.092591609553747e-05, "loss": 0.2084, "step": 521000 }, { "epoch": 5.81, "eval_loss": 0.19897180795669556, "eval_runtime": 2.6826, "eval_samples_per_second": 856.274, "eval_steps_per_second": 13.42, "step": 521000 }, { "epoch": 5.82, "learning_rate": 8.081018212115208e-05, "loss": 0.2088, "step": 521500 }, { "epoch": 5.82, "learning_rate": 8.069444593175975e-05, "loss": 0.2087, "step": 522000 }, { "epoch": 5.82, "eval_loss": 0.19715061783790588, "eval_runtime": 2.574, "eval_samples_per_second": 892.373, "eval_steps_per_second": 13.986, "step": 522000 }, { "epoch": 5.83, "learning_rate": 8.057870784377874e-05, "loss": 0.2091, "step": 522500 }, { "epoch": 5.83, "learning_rate": 8.046296817363259e-05, "loss": 0.2084, "step": 523000 }, { "epoch": 5.83, "eval_loss": 0.19694332778453827, "eval_runtime": 2.6562, "eval_samples_per_second": 864.754, "eval_steps_per_second": 13.553, "step": 523000 }, { "epoch": 5.84, "learning_rate": 8.034722723774913e-05, "loss": 0.2087, "step": 523500 }, { "epoch": 5.84, "learning_rate": 8.023148535255965e-05, "loss": 0.2085, "step": 524000 }, { "epoch": 5.84, "eval_loss": 0.19816145300865173, "eval_runtime": 2.6212, "eval_samples_per_second": 876.329, "eval_steps_per_second": 13.734, "step": 524000 }, { "epoch": 5.85, "learning_rate": 8.011574283449807e-05, "loss": 0.2083, "step": 524500 }, { "epoch": 5.86, "learning_rate": 7.999999999999999e-05, "loss": 0.2084, "step": 525000 }, { "epoch": 5.86, "eval_loss": 0.1987900733947754, "eval_runtime": 2.6058, "eval_samples_per_second": 881.485, "eval_steps_per_second": 13.815, "step": 525000 }, { "epoch": 5.86, "learning_rate": 7.98842571655019e-05, "loss": 0.2084, "step": 525500 }, { "epoch": 5.87, "learning_rate": 7.976851464744033e-05, "loss": 0.2085, "step": 526000 }, { "epoch": 5.87, "eval_loss": 0.1988108605146408, "eval_runtime": 2.5389, "eval_samples_per_second": 904.735, "eval_steps_per_second": 14.18, "step": 526000 }, { "epoch": 5.87, "learning_rate": 7.965277276225087e-05, "loss": 0.208, "step": 526500 }, { "epoch": 5.88, "learning_rate": 7.953703182636741e-05, "loss": 0.208, "step": 527000 }, { "epoch": 5.88, "eval_loss": 0.19850178062915802, "eval_runtime": 2.6174, "eval_samples_per_second": 877.592, "eval_steps_per_second": 13.754, "step": 527000 }, { "epoch": 5.88, "learning_rate": 7.942129215622125e-05, "loss": 0.2086, "step": 527500 }, { "epoch": 5.89, "learning_rate": 7.930555406824026e-05, "loss": 0.208, "step": 528000 }, { "epoch": 5.89, "eval_loss": 0.19715136289596558, "eval_runtime": 2.6066, "eval_samples_per_second": 881.233, "eval_steps_per_second": 13.811, "step": 528000 }, { "epoch": 5.89, "learning_rate": 7.91898178788479e-05, "loss": 0.2078, "step": 528500 }, { "epoch": 5.9, "learning_rate": 7.907408390446254e-05, "loss": 0.2085, "step": 529000 }, { "epoch": 5.9, "eval_loss": 0.1981622874736786, "eval_runtime": 2.6077, "eval_samples_per_second": 880.843, "eval_steps_per_second": 13.805, "step": 529000 }, { "epoch": 5.91, "learning_rate": 7.895835246149643e-05, "loss": 0.2077, "step": 529500 }, { "epoch": 5.91, "learning_rate": 7.884262386635489e-05, "loss": 0.2081, "step": 530000 }, { "epoch": 5.91, "eval_loss": 0.19613459706306458, "eval_runtime": 2.6351, "eval_samples_per_second": 871.683, "eval_steps_per_second": 13.662, "step": 530000 }, { "epoch": 5.92, "learning_rate": 7.872689843543554e-05, "loss": 0.208, "step": 530500 }, { "epoch": 5.92, "learning_rate": 7.861117648512725e-05, "loss": 0.2078, "step": 531000 }, { "epoch": 5.92, "eval_loss": 0.19699302315711975, "eval_runtime": 2.6704, "eval_samples_per_second": 860.181, "eval_steps_per_second": 13.481, "step": 531000 }, { "epoch": 5.93, "learning_rate": 7.849545833180941e-05, "loss": 0.2078, "step": 531500 }, { "epoch": 5.93, "learning_rate": 7.837974429185103e-05, "loss": 0.2084, "step": 532000 }, { "epoch": 5.93, "eval_loss": 0.19729405641555786, "eval_runtime": 2.4788, "eval_samples_per_second": 926.65, "eval_steps_per_second": 14.523, "step": 532000 }, { "epoch": 5.94, "learning_rate": 7.82640346816099e-05, "loss": 0.2078, "step": 532500 }, { "epoch": 5.94, "learning_rate": 7.814832981743164e-05, "loss": 0.2075, "step": 533000 }, { "epoch": 5.94, "eval_loss": 0.1986730396747589, "eval_runtime": 2.5108, "eval_samples_per_second": 914.858, "eval_steps_per_second": 14.338, "step": 533000 }, { "epoch": 5.95, "learning_rate": 7.803263001564899e-05, "loss": 0.2079, "step": 533500 }, { "epoch": 5.96, "learning_rate": 7.791693559258072e-05, "loss": 0.2073, "step": 534000 }, { "epoch": 5.96, "eval_loss": 0.19755910336971283, "eval_runtime": 2.4538, "eval_samples_per_second": 936.101, "eval_steps_per_second": 14.671, "step": 534000 }, { "epoch": 5.96, "learning_rate": 7.780124686453101e-05, "loss": 0.208, "step": 534500 }, { "epoch": 5.97, "learning_rate": 7.768556414778842e-05, "loss": 0.2076, "step": 535000 }, { "epoch": 5.97, "eval_loss": 0.19874924421310425, "eval_runtime": 2.4792, "eval_samples_per_second": 926.525, "eval_steps_per_second": 14.521, "step": 535000 }, { "epoch": 5.97, "learning_rate": 7.756988775862508e-05, "loss": 0.2076, "step": 535500 }, { "epoch": 5.98, "learning_rate": 7.74542180132958e-05, "loss": 0.2076, "step": 536000 }, { "epoch": 5.98, "eval_loss": 0.1981775462627411, "eval_runtime": 2.5707, "eval_samples_per_second": 893.538, "eval_steps_per_second": 14.004, "step": 536000 }, { "epoch": 5.98, "learning_rate": 7.733855522803725e-05, "loss": 0.2075, "step": 536500 }, { "epoch": 5.99, "learning_rate": 7.72228997190671e-05, "loss": 0.2074, "step": 537000 }, { "epoch": 5.99, "eval_loss": 0.19704852998256683, "eval_runtime": 2.4868, "eval_samples_per_second": 923.663, "eval_steps_per_second": 14.476, "step": 537000 }, { "epoch": 5.99, "learning_rate": 7.710725180258306e-05, "loss": 0.2077, "step": 537500 }, { "epoch": 6.0, "learning_rate": 7.699161179476217e-05, "loss": 0.2073, "step": 538000 }, { "epoch": 6.0, "eval_loss": 0.19944582879543304, "eval_runtime": 2.6063, "eval_samples_per_second": 881.323, "eval_steps_per_second": 13.813, "step": 538000 }, { "epoch": 6.01, "learning_rate": 7.687598001175972e-05, "loss": 0.2075, "step": 538500 }, { "epoch": 6.01, "learning_rate": 7.676035676970863e-05, "loss": 0.207, "step": 539000 }, { "epoch": 6.01, "eval_loss": 0.19753174483776093, "eval_runtime": 2.6631, "eval_samples_per_second": 862.522, "eval_steps_per_second": 13.518, "step": 539000 }, { "epoch": 6.02, "learning_rate": 7.664474238471844e-05, "loss": 0.2071, "step": 539500 }, { "epoch": 6.02, "learning_rate": 7.652913717287443e-05, "loss": 0.2069, "step": 540000 }, { "epoch": 6.02, "eval_loss": 0.19679389894008636, "eval_runtime": 2.5175, "eval_samples_per_second": 912.43, "eval_steps_per_second": 14.3, "step": 540000 }, { "epoch": 6.03, "learning_rate": 7.641354145023687e-05, "loss": 0.2072, "step": 540500 }, { "epoch": 6.03, "learning_rate": 7.629795553284005e-05, "loss": 0.2071, "step": 541000 }, { "epoch": 6.03, "eval_loss": 0.19721636176109314, "eval_runtime": 2.791, "eval_samples_per_second": 822.988, "eval_steps_per_second": 12.898, "step": 541000 }, { "epoch": 6.04, "learning_rate": 7.61823797366914e-05, "loss": 0.2071, "step": 541500 }, { "epoch": 6.04, "learning_rate": 7.606681437777081e-05, "loss": 0.2067, "step": 542000 }, { "epoch": 6.04, "eval_loss": 0.19431854784488678, "eval_runtime": 2.5424, "eval_samples_per_second": 903.464, "eval_steps_per_second": 14.16, "step": 542000 }, { "epoch": 6.05, "learning_rate": 7.595125977202952e-05, "loss": 0.207, "step": 542500 }, { "epoch": 6.06, "learning_rate": 7.583571623538939e-05, "loss": 0.2073, "step": 543000 }, { "epoch": 6.06, "eval_loss": 0.1961280107498169, "eval_runtime": 2.5047, "eval_samples_per_second": 917.065, "eval_steps_per_second": 14.373, "step": 543000 }, { "epoch": 6.06, "learning_rate": 7.572018408374208e-05, "loss": 0.2069, "step": 543500 }, { "epoch": 6.07, "learning_rate": 7.560466363294806e-05, "loss": 0.2068, "step": 544000 }, { "epoch": 6.07, "eval_loss": 0.19669890403747559, "eval_runtime": 2.4907, "eval_samples_per_second": 922.238, "eval_steps_per_second": 14.454, "step": 544000 }, { "epoch": 6.07, "learning_rate": 7.548915519883582e-05, "loss": 0.2066, "step": 544500 }, { "epoch": 6.08, "learning_rate": 7.537365909720104e-05, "loss": 0.2065, "step": 545000 }, { "epoch": 6.08, "eval_loss": 0.1973814219236374, "eval_runtime": 2.6437, "eval_samples_per_second": 868.843, "eval_steps_per_second": 13.617, "step": 545000 }, { "epoch": 6.08, "learning_rate": 7.525817564380562e-05, "loss": 0.2068, "step": 545500 }, { "epoch": 6.09, "learning_rate": 7.514270515437691e-05, "loss": 0.2062, "step": 546000 }, { "epoch": 6.09, "eval_loss": 0.19645407795906067, "eval_runtime": 2.6493, "eval_samples_per_second": 867.035, "eval_steps_per_second": 13.589, "step": 546000 }, { "epoch": 6.1, "learning_rate": 7.502724794460685e-05, "loss": 0.2066, "step": 546500 }, { "epoch": 6.1, "learning_rate": 7.491180433015101e-05, "loss": 0.2062, "step": 547000 }, { "epoch": 6.1, "eval_loss": 0.19736704230308533, "eval_runtime": 2.5133, "eval_samples_per_second": 913.953, "eval_steps_per_second": 14.324, "step": 547000 }, { "epoch": 6.11, "learning_rate": 7.479637462662786e-05, "loss": 0.2065, "step": 547500 }, { "epoch": 6.11, "learning_rate": 7.468095914961777e-05, "loss": 0.2063, "step": 548000 }, { "epoch": 6.11, "eval_loss": 0.19768109917640686, "eval_runtime": 2.5796, "eval_samples_per_second": 890.452, "eval_steps_per_second": 13.956, "step": 548000 }, { "epoch": 6.12, "learning_rate": 7.456555821466225e-05, "loss": 0.2063, "step": 548500 }, { "epoch": 6.12, "learning_rate": 7.445017213726307e-05, "loss": 0.2068, "step": 549000 }, { "epoch": 6.12, "eval_loss": 0.19542047381401062, "eval_runtime": 2.4958, "eval_samples_per_second": 920.364, "eval_steps_per_second": 14.425, "step": 549000 }, { "epoch": 6.13, "learning_rate": 7.433480123288138e-05, "loss": 0.2065, "step": 549500 }, { "epoch": 6.13, "learning_rate": 7.421944581693674e-05, "loss": 0.2062, "step": 550000 }, { "epoch": 6.13, "eval_loss": 0.19721828401088715, "eval_runtime": 2.5551, "eval_samples_per_second": 898.971, "eval_steps_per_second": 14.089, "step": 550000 }, { "epoch": 6.14, "learning_rate": 7.410410620480651e-05, "loss": 0.2065, "step": 550500 }, { "epoch": 6.15, "learning_rate": 7.39887827118248e-05, "loss": 0.2063, "step": 551000 }, { "epoch": 6.15, "eval_loss": 0.1966421753168106, "eval_runtime": 2.4465, "eval_samples_per_second": 938.899, "eval_steps_per_second": 14.715, "step": 551000 }, { "epoch": 6.15, "learning_rate": 7.38734756532816e-05, "loss": 0.2062, "step": 551500 }, { "epoch": 6.16, "learning_rate": 7.375818534442207e-05, "loss": 0.2063, "step": 552000 }, { "epoch": 6.16, "eval_loss": 0.19533967971801758, "eval_runtime": 2.4902, "eval_samples_per_second": 922.429, "eval_steps_per_second": 14.457, "step": 552000 }, { "epoch": 6.16, "learning_rate": 7.364291210044542e-05, "loss": 0.2058, "step": 552500 }, { "epoch": 6.17, "learning_rate": 7.352765623650435e-05, "loss": 0.2061, "step": 553000 }, { "epoch": 6.17, "eval_loss": 0.1968429684638977, "eval_runtime": 2.5279, "eval_samples_per_second": 908.659, "eval_steps_per_second": 14.241, "step": 553000 }, { "epoch": 6.17, "learning_rate": 7.341241806770399e-05, "loss": 0.2064, "step": 553500 }, { "epoch": 6.18, "learning_rate": 7.329719790910108e-05, "loss": 0.2056, "step": 554000 }, { "epoch": 6.18, "eval_loss": 0.19725964963436127, "eval_runtime": 2.5134, "eval_samples_per_second": 913.905, "eval_steps_per_second": 14.323, "step": 554000 }, { "epoch": 6.18, "learning_rate": 7.318199607570318e-05, "loss": 0.2057, "step": 554500 }, { "epoch": 6.19, "learning_rate": 7.30668128824676e-05, "loss": 0.2061, "step": 555000 }, { "epoch": 6.19, "eval_loss": 0.19744634628295898, "eval_runtime": 2.5342, "eval_samples_per_second": 906.409, "eval_steps_per_second": 14.206, "step": 555000 }, { "epoch": 6.2, "learning_rate": 7.295164864430088e-05, "loss": 0.2056, "step": 555500 }, { "epoch": 6.2, "learning_rate": 7.283650367605764e-05, "loss": 0.2062, "step": 556000 }, { "epoch": 6.2, "eval_loss": 0.19634658098220825, "eval_runtime": 2.4766, "eval_samples_per_second": 927.495, "eval_steps_per_second": 14.536, "step": 556000 }, { "epoch": 6.21, "learning_rate": 7.272137829253983e-05, "loss": 0.2059, "step": 556500 }, { "epoch": 6.21, "learning_rate": 7.260627280849581e-05, "loss": 0.2061, "step": 557000 }, { "epoch": 6.21, "eval_loss": 0.19559474289417267, "eval_runtime": 2.4504, "eval_samples_per_second": 937.415, "eval_steps_per_second": 14.692, "step": 557000 }, { "epoch": 6.22, "learning_rate": 7.249118753861958e-05, "loss": 0.2056, "step": 557500 }, { "epoch": 6.22, "learning_rate": 7.23761227975499e-05, "loss": 0.2055, "step": 558000 }, { "epoch": 6.22, "eval_loss": 0.1950286328792572, "eval_runtime": 2.5356, "eval_samples_per_second": 905.898, "eval_steps_per_second": 14.198, "step": 558000 }, { "epoch": 6.23, "learning_rate": 7.22610788998694e-05, "loss": 0.2056, "step": 558500 }, { "epoch": 6.23, "learning_rate": 7.21460561601037e-05, "loss": 0.2055, "step": 559000 }, { "epoch": 6.23, "eval_loss": 0.19409525394439697, "eval_runtime": 2.5975, "eval_samples_per_second": 884.304, "eval_steps_per_second": 13.859, "step": 559000 }, { "epoch": 6.24, "learning_rate": 7.203105489272053e-05, "loss": 0.2055, "step": 559500 }, { "epoch": 6.25, "learning_rate": 7.191607541212897e-05, "loss": 0.2057, "step": 560000 }, { "epoch": 6.25, "eval_loss": 0.19587305188179016, "eval_runtime": 2.6038, "eval_samples_per_second": 882.169, "eval_steps_per_second": 13.826, "step": 560000 }, { "epoch": 6.25, "learning_rate": 7.180111803267856e-05, "loss": 0.2054, "step": 560500 }, { "epoch": 6.26, "learning_rate": 7.168618306865838e-05, "loss": 0.2051, "step": 561000 }, { "epoch": 6.26, "eval_loss": 0.195304736495018, "eval_runtime": 2.4332, "eval_samples_per_second": 944.014, "eval_steps_per_second": 14.795, "step": 561000 }, { "epoch": 6.26, "learning_rate": 7.157127083429626e-05, "loss": 0.2053, "step": 561500 }, { "epoch": 6.27, "learning_rate": 7.145638164375779e-05, "loss": 0.205, "step": 562000 }, { "epoch": 6.27, "eval_loss": 0.1958540827035904, "eval_runtime": 2.6408, "eval_samples_per_second": 869.814, "eval_steps_per_second": 13.632, "step": 562000 }, { "epoch": 6.27, "learning_rate": 7.134151581114565e-05, "loss": 0.2053, "step": 562500 }, { "epoch": 6.28, "learning_rate": 7.122667365049869e-05, "loss": 0.2052, "step": 563000 }, { "epoch": 6.28, "eval_loss": 0.19526307284832, "eval_runtime": 2.6193, "eval_samples_per_second": 876.966, "eval_steps_per_second": 13.744, "step": 563000 }, { "epoch": 6.28, "learning_rate": 7.111185547579099e-05, "loss": 0.205, "step": 563500 }, { "epoch": 6.29, "learning_rate": 7.099706160093098e-05, "loss": 0.2051, "step": 564000 }, { "epoch": 6.29, "eval_loss": 0.1962643265724182, "eval_runtime": 2.4959, "eval_samples_per_second": 920.299, "eval_steps_per_second": 14.423, "step": 564000 }, { "epoch": 6.3, "learning_rate": 7.08822923397608e-05, "loss": 0.2054, "step": 564500 }, { "epoch": 6.3, "learning_rate": 7.076754800605516e-05, "loss": 0.2053, "step": 565000 }, { "epoch": 6.3, "eval_loss": 0.19500210881233215, "eval_runtime": 2.6355, "eval_samples_per_second": 871.546, "eval_steps_per_second": 13.659, "step": 565000 }, { "epoch": 6.31, "learning_rate": 7.065282891352078e-05, "loss": 0.2049, "step": 565500 }, { "epoch": 6.31, "learning_rate": 7.053813537579523e-05, "loss": 0.2052, "step": 566000 }, { "epoch": 6.31, "eval_loss": 0.1964665800333023, "eval_runtime": 2.6178, "eval_samples_per_second": 877.444, "eval_steps_per_second": 13.752, "step": 566000 }, { "epoch": 6.32, "learning_rate": 7.042346770644624e-05, "loss": 0.2046, "step": 566500 }, { "epoch": 6.32, "learning_rate": 7.030882621897088e-05, "loss": 0.2046, "step": 567000 }, { "epoch": 6.32, "eval_loss": 0.19378143548965454, "eval_runtime": 2.6471, "eval_samples_per_second": 867.729, "eval_steps_per_second": 13.6, "step": 567000 }, { "epoch": 6.33, "learning_rate": 7.019421122679455e-05, "loss": 0.2052, "step": 567500 }, { "epoch": 6.33, "learning_rate": 7.00796230432703e-05, "loss": 0.2045, "step": 568000 }, { "epoch": 6.33, "eval_loss": 0.1938391774892807, "eval_runtime": 2.5793, "eval_samples_per_second": 890.552, "eval_steps_per_second": 13.957, "step": 568000 }, { "epoch": 6.34, "learning_rate": 6.996506198167789e-05, "loss": 0.2046, "step": 568500 }, { "epoch": 6.35, "learning_rate": 6.985052835522279e-05, "loss": 0.2045, "step": 569000 }, { "epoch": 6.35, "eval_loss": 0.19408397376537323, "eval_runtime": 2.5021, "eval_samples_per_second": 918.029, "eval_steps_per_second": 14.388, "step": 569000 }, { "epoch": 6.35, "learning_rate": 6.973602247703561e-05, "loss": 0.2047, "step": 569500 }, { "epoch": 6.36, "learning_rate": 6.962154466017105e-05, "loss": 0.2047, "step": 570000 }, { "epoch": 6.36, "eval_loss": 0.19305509328842163, "eval_runtime": 2.5881, "eval_samples_per_second": 887.536, "eval_steps_per_second": 13.91, "step": 570000 }, { "epoch": 6.01, "learning_rate": 6.950709521760712e-05, "loss": 0.2048, "step": 570500 }, { "epoch": 6.01, "learning_rate": 6.939267446224418e-05, "loss": 0.2046, "step": 571000 }, { "epoch": 6.01, "eval_loss": 0.19425606727600098, "eval_runtime": 2.5589, "eval_samples_per_second": 897.645, "eval_steps_per_second": 14.068, "step": 571000 }, { "epoch": 6.02, "learning_rate": 6.927828270690422e-05, "loss": 0.2047, "step": 571500 }, { "epoch": 6.02, "learning_rate": 6.91639202643299e-05, "loss": 0.2042, "step": 572000 }, { "epoch": 6.02, "eval_loss": 0.19538278877735138, "eval_runtime": 2.5656, "eval_samples_per_second": 895.308, "eval_steps_per_second": 14.032, "step": 572000 }, { "epoch": 6.03, "learning_rate": 6.904958744718383e-05, "loss": 0.2046, "step": 572500 }, { "epoch": 6.03, "learning_rate": 6.893528456804756e-05, "loss": 0.2042, "step": 573000 }, { "epoch": 6.03, "eval_loss": 0.19409753382205963, "eval_runtime": 2.5486, "eval_samples_per_second": 901.27, "eval_steps_per_second": 14.125, "step": 573000 }, { "epoch": 6.04, "learning_rate": 6.882101193942075e-05, "loss": 0.2042, "step": 573500 }, { "epoch": 6.04, "learning_rate": 6.870676987372044e-05, "loss": 0.2041, "step": 574000 }, { "epoch": 6.04, "eval_loss": 0.1939525604248047, "eval_runtime": 2.5437, "eval_samples_per_second": 903.03, "eval_steps_per_second": 14.153, "step": 574000 }, { "epoch": 6.05, "learning_rate": 6.859255868328003e-05, "loss": 0.2039, "step": 574500 }, { "epoch": 6.06, "learning_rate": 6.847837868034861e-05, "loss": 0.2042, "step": 575000 }, { "epoch": 6.06, "eval_loss": 0.1951504349708557, "eval_runtime": 2.5798, "eval_samples_per_second": 890.377, "eval_steps_per_second": 13.955, "step": 575000 }, { "epoch": 6.06, "learning_rate": 6.836423017708996e-05, "loss": 0.2038, "step": 575500 }, { "epoch": 6.07, "learning_rate": 6.825011348558167e-05, "loss": 0.204, "step": 576000 }, { "epoch": 6.07, "eval_loss": 0.19505272805690765, "eval_runtime": 2.5475, "eval_samples_per_second": 901.67, "eval_steps_per_second": 14.132, "step": 576000 }, { "epoch": 6.07, "learning_rate": 6.813602891781443e-05, "loss": 0.2039, "step": 576500 }, { "epoch": 6.08, "learning_rate": 6.802197678569109e-05, "loss": 0.2038, "step": 577000 }, { "epoch": 6.08, "eval_loss": 0.19440634548664093, "eval_runtime": 2.537, "eval_samples_per_second": 905.395, "eval_steps_per_second": 14.19, "step": 577000 }, { "epoch": 6.08, "learning_rate": 6.790795740102589e-05, "loss": 0.2038, "step": 577500 }, { "epoch": 6.09, "learning_rate": 6.779397107554339e-05, "loss": 0.2038, "step": 578000 }, { "epoch": 6.09, "eval_loss": 0.19268804788589478, "eval_runtime": 2.5143, "eval_samples_per_second": 913.559, "eval_steps_per_second": 14.318, "step": 578000 }, { "epoch": 6.09, "learning_rate": 6.768001812087789e-05, "loss": 0.2038, "step": 578500 }, { "epoch": 6.1, "learning_rate": 6.756609884857239e-05, "loss": 0.2037, "step": 579000 }, { "epoch": 6.1, "eval_loss": 0.19323283433914185, "eval_runtime": 2.5075, "eval_samples_per_second": 916.04, "eval_steps_per_second": 14.357, "step": 579000 }, { "epoch": 6.11, "learning_rate": 6.745221357007786e-05, "loss": 0.2037, "step": 579500 }, { "epoch": 6.11, "learning_rate": 6.733836259675233e-05, "loss": 0.2036, "step": 580000 }, { "epoch": 6.11, "eval_loss": 0.19253070652484894, "eval_runtime": 2.4507, "eval_samples_per_second": 937.277, "eval_steps_per_second": 14.69, "step": 580000 }, { "epoch": 6.12, "learning_rate": 6.722454623985994e-05, "loss": 0.2036, "step": 580500 }, { "epoch": 6.12, "learning_rate": 6.71107648105703e-05, "loss": 0.2038, "step": 581000 }, { "epoch": 6.12, "eval_loss": 0.1942874640226364, "eval_runtime": 2.6604, "eval_samples_per_second": 863.404, "eval_steps_per_second": 13.532, "step": 581000 }, { "epoch": 6.13, "learning_rate": 6.69970186199575e-05, "loss": 0.204, "step": 581500 }, { "epoch": 6.13, "learning_rate": 6.688330797899925e-05, "loss": 0.2036, "step": 582000 }, { "epoch": 6.13, "eval_loss": 0.1936686784029007, "eval_runtime": 2.5783, "eval_samples_per_second": 890.887, "eval_steps_per_second": 13.963, "step": 582000 }, { "epoch": 6.14, "learning_rate": 6.676963319857618e-05, "loss": 0.2038, "step": 582500 }, { "epoch": 6.14, "learning_rate": 6.665599458947072e-05, "loss": 0.2035, "step": 583000 }, { "epoch": 6.14, "eval_loss": 0.1947861611843109, "eval_runtime": 2.5506, "eval_samples_per_second": 900.557, "eval_steps_per_second": 14.114, "step": 583000 }, { "epoch": 6.15, "learning_rate": 6.654239246236651e-05, "loss": 0.2037, "step": 583500 }, { "epoch": 6.16, "learning_rate": 6.642882712784742e-05, "loss": 0.2033, "step": 584000 }, { "epoch": 6.16, "eval_loss": 0.1927429735660553, "eval_runtime": 2.5138, "eval_samples_per_second": 913.772, "eval_steps_per_second": 14.321, "step": 584000 }, { "epoch": 6.16, "learning_rate": 6.631529889639679e-05, "loss": 0.2032, "step": 584500 }, { "epoch": 6.17, "learning_rate": 6.620180807839639e-05, "loss": 0.2033, "step": 585000 }, { "epoch": 6.17, "eval_loss": 0.19153708219528198, "eval_runtime": 2.5442, "eval_samples_per_second": 902.822, "eval_steps_per_second": 14.15, "step": 585000 }, { "epoch": 6.17, "learning_rate": 6.608835498412583e-05, "loss": 0.2034, "step": 585500 }, { "epoch": 6.18, "learning_rate": 6.597493992376152e-05, "loss": 0.2031, "step": 586000 }, { "epoch": 6.18, "eval_loss": 0.1941889226436615, "eval_runtime": 2.5658, "eval_samples_per_second": 895.227, "eval_steps_per_second": 14.031, "step": 586000 }, { "epoch": 6.18, "learning_rate": 6.586156320737592e-05, "loss": 0.2032, "step": 586500 }, { "epoch": 6.19, "learning_rate": 6.574822514493664e-05, "loss": 0.2029, "step": 587000 }, { "epoch": 6.19, "eval_loss": 0.19126850366592407, "eval_runtime": 2.5788, "eval_samples_per_second": 890.72, "eval_steps_per_second": 13.96, "step": 587000 }, { "epoch": 6.2, "learning_rate": 6.56349260463056e-05, "loss": 0.2026, "step": 587500 }, { "epoch": 6.2, "learning_rate": 6.552166622123824e-05, "loss": 0.2031, "step": 588000 }, { "epoch": 6.2, "eval_loss": 0.19138653576374054, "eval_runtime": 2.5292, "eval_samples_per_second": 908.193, "eval_steps_per_second": 14.234, "step": 588000 }, { "epoch": 6.21, "learning_rate": 6.540844597938256e-05, "loss": 0.203, "step": 588500 }, { "epoch": 6.21, "learning_rate": 6.52952656302784e-05, "loss": 0.203, "step": 589000 }, { "epoch": 6.21, "eval_loss": 0.1942255049943924, "eval_runtime": 2.5063, "eval_samples_per_second": 916.493, "eval_steps_per_second": 14.364, "step": 589000 }, { "epoch": 6.22, "learning_rate": 6.518212548335651e-05, "loss": 0.2027, "step": 589500 }, { "epoch": 6.22, "learning_rate": 6.506902584793773e-05, "loss": 0.2032, "step": 590000 }, { "epoch": 6.22, "eval_loss": 0.1933428943157196, "eval_runtime": 2.5059, "eval_samples_per_second": 916.62, "eval_steps_per_second": 14.366, "step": 590000 }, { "epoch": 6.23, "learning_rate": 6.495596703323214e-05, "loss": 0.203, "step": 590500 }, { "epoch": 6.23, "learning_rate": 6.484294934833822e-05, "loss": 0.203, "step": 591000 }, { "epoch": 6.23, "eval_loss": 0.19498485326766968, "eval_runtime": 2.4612, "eval_samples_per_second": 933.27, "eval_steps_per_second": 14.627, "step": 591000 }, { "epoch": 6.24, "learning_rate": 6.472997310224204e-05, "loss": 0.2028, "step": 591500 }, { "epoch": 6.25, "learning_rate": 6.461703860381628e-05, "loss": 0.2029, "step": 592000 }, { "epoch": 6.25, "eval_loss": 0.19485318660736084, "eval_runtime": 2.5316, "eval_samples_per_second": 907.333, "eval_steps_per_second": 14.22, "step": 592000 }, { "epoch": 6.25, "learning_rate": 6.450414616181959e-05, "loss": 0.2027, "step": 592500 }, { "epoch": 6.26, "learning_rate": 6.439129608489559e-05, "loss": 0.2023, "step": 593000 }, { "epoch": 6.26, "eval_loss": 0.19170017540454865, "eval_runtime": 2.5465, "eval_samples_per_second": 902.036, "eval_steps_per_second": 14.137, "step": 593000 }, { "epoch": 6.26, "learning_rate": 6.427848868157208e-05, "loss": 0.2021, "step": 593500 }, { "epoch": 6.27, "learning_rate": 6.41657242602602e-05, "loss": 0.2024, "step": 594000 }, { "epoch": 6.27, "eval_loss": 0.19145548343658447, "eval_runtime": 2.5956, "eval_samples_per_second": 884.955, "eval_steps_per_second": 13.87, "step": 594000 }, { "epoch": 6.27, "learning_rate": 6.405300312925353e-05, "loss": 0.2026, "step": 594500 }, { "epoch": 6.28, "learning_rate": 6.39403255967274e-05, "loss": 0.2021, "step": 595000 }, { "epoch": 6.28, "eval_loss": 0.19004245102405548, "eval_runtime": 2.516, "eval_samples_per_second": 912.956, "eval_steps_per_second": 14.308, "step": 595000 }, { "epoch": 6.28, "learning_rate": 6.382769197073783e-05, "loss": 0.2023, "step": 595500 }, { "epoch": 6.29, "learning_rate": 6.371510255922088e-05, "loss": 0.2024, "step": 596000 }, { "epoch": 6.29, "eval_loss": 0.19270072877407074, "eval_runtime": 2.4967, "eval_samples_per_second": 920.021, "eval_steps_per_second": 14.419, "step": 596000 }, { "epoch": 6.3, "learning_rate": 6.360255766999172e-05, "loss": 0.2025, "step": 596500 }, { "epoch": 6.3, "learning_rate": 6.349005761074372e-05, "loss": 0.2027, "step": 597000 }, { "epoch": 6.3, "eval_loss": 0.18951117992401123, "eval_runtime": 2.5319, "eval_samples_per_second": 907.212, "eval_steps_per_second": 14.218, "step": 597000 }, { "epoch": 6.31, "learning_rate": 6.33776026890478e-05, "loss": 0.2019, "step": 597500 }, { "epoch": 6.31, "learning_rate": 6.326519321235139e-05, "loss": 0.2025, "step": 598000 }, { "epoch": 6.31, "eval_loss": 0.19144007563591003, "eval_runtime": 2.4829, "eval_samples_per_second": 925.137, "eval_steps_per_second": 14.499, "step": 598000 }, { "epoch": 6.32, "learning_rate": 6.315282948797776e-05, "loss": 0.2022, "step": 598500 }, { "epoch": 6.32, "learning_rate": 6.304051182312496e-05, "loss": 0.2021, "step": 599000 }, { "epoch": 6.32, "eval_loss": 0.18959102034568787, "eval_runtime": 2.5681, "eval_samples_per_second": 894.45, "eval_steps_per_second": 14.018, "step": 599000 }, { "epoch": 6.33, "learning_rate": 6.292824052486525e-05, "loss": 0.2023, "step": 599500 }, { "epoch": 6.33, "learning_rate": 6.281601590014407e-05, "loss": 0.2018, "step": 600000 }, { "epoch": 6.33, "eval_loss": 0.1930903047323227, "eval_runtime": 2.4861, "eval_samples_per_second": 923.926, "eval_steps_per_second": 14.48, "step": 600000 }, { "epoch": 6.34, "learning_rate": 6.270383825577923e-05, "loss": 0.2017, "step": 600500 }, { "epoch": 6.35, "learning_rate": 6.259170789846017e-05, "loss": 0.2019, "step": 601000 }, { "epoch": 6.35, "eval_loss": 0.1939515918493271, "eval_runtime": 2.5332, "eval_samples_per_second": 906.761, "eval_steps_per_second": 14.211, "step": 601000 }, { "epoch": 6.35, "learning_rate": 6.247962513474697e-05, "loss": 0.2021, "step": 601500 }, { "epoch": 6.36, "learning_rate": 6.236759027106965e-05, "loss": 0.2019, "step": 602000 }, { "epoch": 6.36, "eval_loss": 0.192647323012352, "eval_runtime": 2.5565, "eval_samples_per_second": 898.498, "eval_steps_per_second": 14.082, "step": 602000 }, { "epoch": 6.36, "learning_rate": 6.225560361372722e-05, "loss": 0.202, "step": 602500 }, { "epoch": 6.37, "learning_rate": 6.214366546888694e-05, "loss": 0.2014, "step": 603000 }, { "epoch": 6.37, "eval_loss": 0.19305016100406647, "eval_runtime": 2.5415, "eval_samples_per_second": 903.802, "eval_steps_per_second": 14.165, "step": 603000 }, { "epoch": 6.37, "learning_rate": 6.203177614258345e-05, "loss": 0.2016, "step": 603500 }, { "epoch": 6.38, "learning_rate": 6.191993594071785e-05, "loss": 0.2021, "step": 604000 }, { "epoch": 6.38, "eval_loss": 0.19003823399543762, "eval_runtime": 2.6362, "eval_samples_per_second": 871.343, "eval_steps_per_second": 13.656, "step": 604000 }, { "epoch": 6.38, "learning_rate": 6.180814516905701e-05, "loss": 0.2019, "step": 604500 }, { "epoch": 6.39, "learning_rate": 6.169640413323262e-05, "loss": 0.2016, "step": 605000 }, { "epoch": 6.39, "eval_loss": 0.19246533513069153, "eval_runtime": 2.5718, "eval_samples_per_second": 893.139, "eval_steps_per_second": 13.998, "step": 605000 }, { "epoch": 6.4, "learning_rate": 6.158471313874041e-05, "loss": 0.2017, "step": 605500 }, { "epoch": 6.4, "learning_rate": 6.147307249093929e-05, "loss": 0.2016, "step": 606000 }, { "epoch": 6.4, "eval_loss": 0.19287154078483582, "eval_runtime": 2.4757, "eval_samples_per_second": 927.823, "eval_steps_per_second": 14.541, "step": 606000 }, { "epoch": 6.41, "learning_rate": 6.136148249505053e-05, "loss": 0.2016, "step": 606500 }, { "epoch": 6.41, "learning_rate": 6.124994345615693e-05, "loss": 0.2018, "step": 607000 }, { "epoch": 6.41, "eval_loss": 0.19156165421009064, "eval_runtime": 2.5771, "eval_samples_per_second": 891.317, "eval_steps_per_second": 13.969, "step": 607000 }, { "epoch": 6.42, "learning_rate": 6.113845567920194e-05, "loss": 0.201, "step": 607500 }, { "epoch": 6.42, "learning_rate": 6.102701946898891e-05, "loss": 0.2013, "step": 608000 }, { "epoch": 6.42, "eval_loss": 0.19082282483577728, "eval_runtime": 2.6476, "eval_samples_per_second": 867.578, "eval_steps_per_second": 13.597, "step": 608000 }, { "epoch": 6.43, "learning_rate": 6.0915635130180154e-05, "loss": 0.2013, "step": 608500 }, { "epoch": 6.43, "learning_rate": 6.0804302967296225e-05, "loss": 0.2019, "step": 609000 }, { "epoch": 6.43, "eval_loss": 0.19137336313724518, "eval_runtime": 2.6058, "eval_samples_per_second": 881.501, "eval_steps_per_second": 13.815, "step": 609000 }, { "epoch": 6.44, "learning_rate": 6.0693023284715e-05, "loss": 0.2014, "step": 609500 }, { "epoch": 6.45, "learning_rate": 6.058179638667089e-05, "loss": 0.201, "step": 610000 }, { "epoch": 6.45, "eval_loss": 0.19182415306568146, "eval_runtime": 2.5256, "eval_samples_per_second": 909.494, "eval_steps_per_second": 14.254, "step": 610000 }, { "epoch": 6.45, "learning_rate": 6.047062257725395e-05, "loss": 0.2011, "step": 610500 }, { "epoch": 6.46, "learning_rate": 6.035950216040917e-05, "loss": 0.2014, "step": 611000 }, { "epoch": 6.46, "eval_loss": 0.19371135532855988, "eval_runtime": 2.5383, "eval_samples_per_second": 904.933, "eval_steps_per_second": 14.183, "step": 611000 }, { "epoch": 6.46, "learning_rate": 6.0248435439935516e-05, "loss": 0.2018, "step": 611500 }, { "epoch": 6.47, "learning_rate": 6.0137422719485145e-05, "loss": 0.2009, "step": 612000 }, { "epoch": 6.47, "eval_loss": 0.18979497253894806, "eval_runtime": 2.61, "eval_samples_per_second": 880.076, "eval_steps_per_second": 13.793, "step": 612000 }, { "epoch": 6.47, "learning_rate": 6.0026464302562636e-05, "loss": 0.2009, "step": 612500 }, { "epoch": 6.48, "learning_rate": 5.991556049252401e-05, "loss": 0.2012, "step": 613000 }, { "epoch": 6.48, "eval_loss": 0.19082209467887878, "eval_runtime": 2.5296, "eval_samples_per_second": 908.057, "eval_steps_per_second": 14.232, "step": 613000 }, { "epoch": 6.49, "learning_rate": 5.980471159257609e-05, "loss": 0.2015, "step": 613500 }, { "epoch": 6.49, "learning_rate": 5.969391790577551e-05, "loss": 0.201, "step": 614000 }, { "epoch": 6.49, "eval_loss": 0.19177234172821045, "eval_runtime": 2.4578, "eval_samples_per_second": 934.56, "eval_steps_per_second": 14.647, "step": 614000 }, { "epoch": 6.5, "learning_rate": 5.958317973502798e-05, "loss": 0.2009, "step": 614500 }, { "epoch": 6.5, "learning_rate": 5.947249738308747e-05, "loss": 0.2009, "step": 615000 }, { "epoch": 6.5, "eval_loss": 0.18852779269218445, "eval_runtime": 2.5225, "eval_samples_per_second": 910.6, "eval_steps_per_second": 14.271, "step": 615000 }, { "epoch": 6.51, "learning_rate": 5.9361871152555254e-05, "loss": 0.2008, "step": 615500 }, { "epoch": 6.51, "learning_rate": 5.925130134587924e-05, "loss": 0.2007, "step": 616000 }, { "epoch": 6.51, "eval_loss": 0.18964450061321259, "eval_runtime": 2.6115, "eval_samples_per_second": 879.56, "eval_steps_per_second": 13.785, "step": 616000 }, { "epoch": 6.52, "learning_rate": 5.914078826535307e-05, "loss": 0.2009, "step": 616500 }, { "epoch": 6.52, "learning_rate": 5.903033221311528e-05, "loss": 0.2005, "step": 617000 }, { "epoch": 6.52, "eval_loss": 0.19303825497627258, "eval_runtime": 2.4819, "eval_samples_per_second": 925.494, "eval_steps_per_second": 14.505, "step": 617000 }, { "epoch": 6.53, "learning_rate": 5.891993349114847e-05, "loss": 0.201, "step": 617500 }, { "epoch": 6.54, "learning_rate": 5.880959240127858e-05, "loss": 0.2008, "step": 618000 }, { "epoch": 6.54, "eval_loss": 0.1906815767288208, "eval_runtime": 2.5316, "eval_samples_per_second": 907.334, "eval_steps_per_second": 14.22, "step": 618000 }, { "epoch": 6.54, "learning_rate": 5.86993092451739e-05, "loss": 0.2005, "step": 618500 }, { "epoch": 6.55, "learning_rate": 5.858908432434438e-05, "loss": 0.2002, "step": 619000 }, { "epoch": 6.55, "eval_loss": 0.18906356394290924, "eval_runtime": 2.5512, "eval_samples_per_second": 900.346, "eval_steps_per_second": 14.111, "step": 619000 }, { "epoch": 6.55, "learning_rate": 5.847891794014074e-05, "loss": 0.2007, "step": 619500 }, { "epoch": 6.56, "learning_rate": 5.8368810393753684e-05, "loss": 0.2003, "step": 620000 }, { "epoch": 6.56, "eval_loss": 0.19198235869407654, "eval_runtime": 2.5115, "eval_samples_per_second": 914.603, "eval_steps_per_second": 14.334, "step": 620000 }, { "epoch": 6.56, "learning_rate": 5.8258761986213015e-05, "loss": 0.2004, "step": 620500 }, { "epoch": 6.57, "learning_rate": 5.814877301838688e-05, "loss": 0.2002, "step": 621000 }, { "epoch": 6.57, "eval_loss": 0.19093768298625946, "eval_runtime": 2.5116, "eval_samples_per_second": 914.543, "eval_steps_per_second": 14.333, "step": 621000 }, { "epoch": 6.57, "learning_rate": 5.803884379098094e-05, "loss": 0.2005, "step": 621500 }, { "epoch": 6.58, "learning_rate": 5.7928974604537494e-05, "loss": 0.2003, "step": 622000 }, { "epoch": 6.58, "eval_loss": 0.19122566282749176, "eval_runtime": 2.5785, "eval_samples_per_second": 890.823, "eval_steps_per_second": 13.962, "step": 622000 }, { "epoch": 6.59, "learning_rate": 5.781916575943469e-05, "loss": 0.2, "step": 622500 }, { "epoch": 6.59, "learning_rate": 5.770941755588573e-05, "loss": 0.2005, "step": 623000 }, { "epoch": 6.59, "eval_loss": 0.19022968411445618, "eval_runtime": 2.5145, "eval_samples_per_second": 913.487, "eval_steps_per_second": 14.317, "step": 623000 }, { "epoch": 6.6, "learning_rate": 5.7599730293938e-05, "loss": 0.2005, "step": 623500 }, { "epoch": 6.6, "learning_rate": 5.749010427347233e-05, "loss": 0.1998, "step": 624000 }, { "epoch": 6.6, "eval_loss": 0.18952861428260803, "eval_runtime": 2.6789, "eval_samples_per_second": 857.447, "eval_steps_per_second": 13.438, "step": 624000 }, { "epoch": 6.61, "learning_rate": 5.738053979420199e-05, "loss": 0.2004, "step": 624500 }, { "epoch": 6.61, "learning_rate": 5.7271037155672156e-05, "loss": 0.2, "step": 625000 }, { "epoch": 6.61, "eval_loss": 0.18912938237190247, "eval_runtime": 2.5074, "eval_samples_per_second": 916.101, "eval_steps_per_second": 14.358, "step": 625000 }, { "epoch": 6.62, "learning_rate": 5.716159665725883e-05, "loss": 0.1998, "step": 625500 }, { "epoch": 6.62, "learning_rate": 5.7052218598168154e-05, "loss": 0.1996, "step": 626000 }, { "epoch": 6.62, "eval_loss": 0.19050002098083496, "eval_runtime": 2.5363, "eval_samples_per_second": 905.665, "eval_steps_per_second": 14.194, "step": 626000 }, { "epoch": 6.63, "learning_rate": 5.69429032774356e-05, "loss": 0.1999, "step": 626500 }, { "epoch": 6.64, "learning_rate": 5.6833650993925016e-05, "loss": 0.1998, "step": 627000 }, { "epoch": 6.64, "eval_loss": 0.19091765582561493, "eval_runtime": 2.6002, "eval_samples_per_second": 883.405, "eval_steps_per_second": 13.845, "step": 627000 }, { "epoch": 6.64, "learning_rate": 5.6724462046328025e-05, "loss": 0.2, "step": 627500 }, { "epoch": 6.65, "learning_rate": 5.661533673316303e-05, "loss": 0.2, "step": 628000 }, { "epoch": 6.65, "eval_loss": 0.19069619476795197, "eval_runtime": 2.549, "eval_samples_per_second": 901.121, "eval_steps_per_second": 14.123, "step": 628000 }, { "epoch": 6.65, "learning_rate": 5.6506275352774447e-05, "loss": 0.1998, "step": 628500 }, { "epoch": 6.66, "learning_rate": 5.639727820333198e-05, "loss": 0.2001, "step": 629000 }, { "epoch": 6.66, "eval_loss": 0.19251221418380737, "eval_runtime": 2.5685, "eval_samples_per_second": 894.303, "eval_steps_per_second": 14.016, "step": 629000 }, { "epoch": 6.66, "learning_rate": 5.62883455828296e-05, "loss": 0.1996, "step": 629500 }, { "epoch": 6.67, "learning_rate": 5.617947778908498e-05, "loss": 0.2, "step": 630000 }, { "epoch": 6.67, "eval_loss": 0.19011762738227844, "eval_runtime": 2.6047, "eval_samples_per_second": 881.86, "eval_steps_per_second": 13.821, "step": 630000 }, { "epoch": 6.67, "learning_rate": 5.60706751197385e-05, "loss": 0.1996, "step": 630500 }, { "epoch": 6.68, "learning_rate": 5.596193787225254e-05, "loss": 0.1998, "step": 631000 }, { "epoch": 6.68, "eval_loss": 0.19056497514247894, "eval_runtime": 2.5606, "eval_samples_per_second": 897.047, "eval_steps_per_second": 14.059, "step": 631000 }, { "epoch": 6.69, "learning_rate": 5.585326634391049e-05, "loss": 0.1993, "step": 631500 }, { "epoch": 6.69, "learning_rate": 5.574466083181624e-05, "loss": 0.1993, "step": 632000 }, { "epoch": 6.69, "eval_loss": 0.18938562273979187, "eval_runtime": 2.5436, "eval_samples_per_second": 903.047, "eval_steps_per_second": 14.153, "step": 632000 }, { "epoch": 6.7, "learning_rate": 5.563612163289308e-05, "loss": 0.1995, "step": 632500 }, { "epoch": 6.7, "learning_rate": 5.552764904388305e-05, "loss": 0.1992, "step": 633000 }, { "epoch": 6.7, "eval_loss": 0.18923692405223846, "eval_runtime": 2.5198, "eval_samples_per_second": 911.583, "eval_steps_per_second": 14.287, "step": 633000 }, { "epoch": 6.71, "learning_rate": 5.541924336134609e-05, "loss": 0.1998, "step": 633500 }, { "epoch": 6.71, "learning_rate": 5.5310904881659116e-05, "loss": 0.1991, "step": 634000 }, { "epoch": 6.71, "eval_loss": 0.18939541280269623, "eval_runtime": 2.5614, "eval_samples_per_second": 896.775, "eval_steps_per_second": 14.055, "step": 634000 }, { "epoch": 6.72, "learning_rate": 5.5202633901015464e-05, "loss": 0.1989, "step": 634500 }, { "epoch": 6.72, "learning_rate": 5.5094430715423835e-05, "loss": 0.1989, "step": 635000 }, { "epoch": 6.72, "eval_loss": 0.18934237957000732, "eval_runtime": 2.4647, "eval_samples_per_second": 931.977, "eval_steps_per_second": 14.607, "step": 635000 }, { "epoch": 6.73, "learning_rate": 5.4986295620707626e-05, "loss": 0.1993, "step": 635500 }, { "epoch": 6.74, "learning_rate": 5.487822891250406e-05, "loss": 0.1991, "step": 636000 }, { "epoch": 6.74, "eval_loss": 0.18926899135112762, "eval_runtime": 2.5769, "eval_samples_per_second": 891.373, "eval_steps_per_second": 13.97, "step": 636000 }, { "epoch": 6.74, "learning_rate": 5.477023088626334e-05, "loss": 0.1994, "step": 636500 }, { "epoch": 6.75, "learning_rate": 5.4662301837247985e-05, "loss": 0.1988, "step": 637000 }, { "epoch": 6.75, "eval_loss": 0.18590334057807922, "eval_runtime": 2.6094, "eval_samples_per_second": 880.292, "eval_steps_per_second": 13.796, "step": 637000 }, { "epoch": 6.75, "learning_rate": 5.45544420605319e-05, "loss": 0.1991, "step": 637500 }, { "epoch": 6.76, "learning_rate": 5.4446651850999604e-05, "loss": 0.1987, "step": 638000 }, { "epoch": 6.76, "eval_loss": 0.1874515414237976, "eval_runtime": 2.6122, "eval_samples_per_second": 879.338, "eval_steps_per_second": 13.782, "step": 638000 }, { "epoch": 6.76, "learning_rate": 5.433893150334538e-05, "loss": 0.1991, "step": 638500 }, { "epoch": 6.77, "learning_rate": 5.4231281312072544e-05, "loss": 0.1989, "step": 639000 }, { "epoch": 6.77, "eval_loss": 0.1883026510477066, "eval_runtime": 2.5837, "eval_samples_per_second": 889.018, "eval_steps_per_second": 13.933, "step": 639000 }, { "epoch": 6.78, "learning_rate": 5.4123701571492636e-05, "loss": 0.1985, "step": 639500 }, { "epoch": 6.78, "learning_rate": 5.401619257572453e-05, "loss": 0.1987, "step": 640000 }, { "epoch": 6.78, "eval_loss": 0.18854811787605286, "eval_runtime": 2.5902, "eval_samples_per_second": 886.813, "eval_steps_per_second": 13.899, "step": 640000 }, { "epoch": 6.79, "learning_rate": 5.390875461869379e-05, "loss": 0.1994, "step": 640500 }, { "epoch": 6.79, "learning_rate": 5.3801387994131576e-05, "loss": 0.1988, "step": 641000 }, { "epoch": 6.79, "eval_loss": 0.1891511082649231, "eval_runtime": 2.5529, "eval_samples_per_second": 899.746, "eval_steps_per_second": 14.101, "step": 641000 }, { "epoch": 6.8, "learning_rate": 5.36940929955742e-05, "loss": 0.1984, "step": 641500 }, { "epoch": 6.8, "learning_rate": 5.358686991636209e-05, "loss": 0.1986, "step": 642000 }, { "epoch": 6.8, "eval_loss": 0.1878819614648819, "eval_runtime": 2.5682, "eval_samples_per_second": 894.392, "eval_steps_per_second": 14.017, "step": 642000 }, { "epoch": 6.81, "learning_rate": 5.347971904963904e-05, "loss": 0.1984, "step": 642500 }, { "epoch": 6.81, "learning_rate": 5.3372640688351476e-05, "loss": 0.1988, "step": 643000 }, { "epoch": 6.81, "eval_loss": 0.18957975506782532, "eval_runtime": 2.5593, "eval_samples_per_second": 897.526, "eval_steps_per_second": 14.067, "step": 643000 }, { "epoch": 6.82, "learning_rate": 5.326563512524748e-05, "loss": 0.1984, "step": 643500 }, { "epoch": 6.83, "learning_rate": 5.315870265287618e-05, "loss": 0.1985, "step": 644000 }, { "epoch": 6.83, "eval_loss": 0.18896985054016113, "eval_runtime": 2.5541, "eval_samples_per_second": 899.348, "eval_steps_per_second": 14.095, "step": 644000 }, { "epoch": 6.83, "learning_rate": 5.3051843563586914e-05, "loss": 0.1985, "step": 644500 }, { "epoch": 6.84, "learning_rate": 5.294505814952835e-05, "loss": 0.1981, "step": 645000 }, { "epoch": 6.84, "eval_loss": 0.18807749450206757, "eval_runtime": 2.6155, "eval_samples_per_second": 878.24, "eval_steps_per_second": 13.764, "step": 645000 }, { "epoch": 6.84, "learning_rate": 5.28383467026477e-05, "loss": 0.1983, "step": 645500 }, { "epoch": 6.85, "learning_rate": 5.2731709514689995e-05, "loss": 0.1982, "step": 646000 }, { "epoch": 6.85, "eval_loss": 0.18697316944599152, "eval_runtime": 2.5109, "eval_samples_per_second": 914.815, "eval_steps_per_second": 14.338, "step": 646000 }, { "epoch": 6.85, "learning_rate": 5.262514687719722e-05, "loss": 0.1983, "step": 646500 }, { "epoch": 6.86, "learning_rate": 5.25186590815076e-05, "loss": 0.1985, "step": 647000 }, { "epoch": 6.86, "eval_loss": 0.1897994428873062, "eval_runtime": 2.4914, "eval_samples_per_second": 921.982, "eval_steps_per_second": 14.45, "step": 647000 }, { "epoch": 6.86, "learning_rate": 5.24122464187547e-05, "loss": 0.1981, "step": 647500 }, { "epoch": 6.87, "learning_rate": 5.2305909179866635e-05, "loss": 0.1982, "step": 648000 }, { "epoch": 6.87, "eval_loss": 0.18750005960464478, "eval_runtime": 2.5048, "eval_samples_per_second": 917.025, "eval_steps_per_second": 14.372, "step": 648000 }, { "epoch": 6.88, "learning_rate": 5.219964765556536e-05, "loss": 0.1982, "step": 648500 }, { "epoch": 6.88, "learning_rate": 5.209346213636584e-05, "loss": 0.198, "step": 649000 }, { "epoch": 6.88, "eval_loss": 0.18683280050754547, "eval_runtime": 2.5207, "eval_samples_per_second": 911.266, "eval_steps_per_second": 14.282, "step": 649000 }, { "epoch": 6.89, "learning_rate": 5.1987352912575244e-05, "loss": 0.1982, "step": 649500 }, { "epoch": 6.89, "learning_rate": 5.188132027429215e-05, "loss": 0.198, "step": 650000 }, { "epoch": 6.89, "eval_loss": 0.18715369701385498, "eval_runtime": 2.4622, "eval_samples_per_second": 932.918, "eval_steps_per_second": 14.621, "step": 650000 }, { "epoch": 6.9, "learning_rate": 5.177536451140569e-05, "loss": 0.1982, "step": 650500 }, { "epoch": 6.9, "learning_rate": 5.166948591359489e-05, "loss": 0.1983, "step": 651000 }, { "epoch": 6.9, "eval_loss": 0.18645188212394714, "eval_runtime": 2.4913, "eval_samples_per_second": 922.024, "eval_steps_per_second": 14.451, "step": 651000 }, { "epoch": 6.91, "learning_rate": 5.1563684770327804e-05, "loss": 0.1983, "step": 651500 }, { "epoch": 6.91, "learning_rate": 5.145796137086076e-05, "loss": 0.1981, "step": 652000 }, { "epoch": 6.91, "eval_loss": 0.1894109547138214, "eval_runtime": 2.5469, "eval_samples_per_second": 901.873, "eval_steps_per_second": 14.135, "step": 652000 }, { "epoch": 6.92, "learning_rate": 5.135231600423742e-05, "loss": 0.1979, "step": 652500 }, { "epoch": 6.93, "learning_rate": 5.124674895928823e-05, "loss": 0.1976, "step": 653000 }, { "epoch": 6.93, "eval_loss": 0.18761011958122253, "eval_runtime": 2.6483, "eval_samples_per_second": 867.333, "eval_steps_per_second": 13.593, "step": 653000 }, { "epoch": 6.93, "learning_rate": 5.114126052462943e-05, "loss": 0.1978, "step": 653500 }, { "epoch": 6.94, "learning_rate": 5.103585098866237e-05, "loss": 0.1976, "step": 654000 }, { "epoch": 6.94, "eval_loss": 0.1896512508392334, "eval_runtime": 2.6053, "eval_samples_per_second": 881.651, "eval_steps_per_second": 13.818, "step": 654000 }, { "epoch": 6.94, "learning_rate": 5.093052063957276e-05, "loss": 0.1978, "step": 654500 }, { "epoch": 6.95, "learning_rate": 5.082526976532968e-05, "loss": 0.1982, "step": 655000 }, { "epoch": 6.95, "eval_loss": 0.18722392618656158, "eval_runtime": 2.5063, "eval_samples_per_second": 916.492, "eval_steps_per_second": 14.364, "step": 655000 }, { "epoch": 6.95, "learning_rate": 5.072009865368501e-05, "loss": 0.1977, "step": 655500 }, { "epoch": 6.96, "learning_rate": 5.061500759217261e-05, "loss": 0.1976, "step": 656000 }, { "epoch": 6.96, "eval_loss": 0.1882035732269287, "eval_runtime": 2.625, "eval_samples_per_second": 875.059, "eval_steps_per_second": 13.714, "step": 656000 }, { "epoch": 6.96, "learning_rate": 5.050999686810735e-05, "loss": 0.1977, "step": 656500 }, { "epoch": 6.97, "learning_rate": 5.04050667685846e-05, "loss": 0.1979, "step": 657000 }, { "epoch": 6.97, "eval_loss": 0.18653704226016998, "eval_runtime": 2.582, "eval_samples_per_second": 889.617, "eval_steps_per_second": 13.943, "step": 657000 }, { "epoch": 6.98, "learning_rate": 5.0300217580479244e-05, "loss": 0.1979, "step": 657500 }, { "epoch": 6.98, "learning_rate": 5.01954495904449e-05, "loss": 0.1973, "step": 658000 }, { "epoch": 6.98, "eval_loss": 0.1866321712732315, "eval_runtime": 2.5422, "eval_samples_per_second": 903.55, "eval_steps_per_second": 14.161, "step": 658000 }, { "epoch": 6.99, "learning_rate": 5.0090763084913336e-05, "loss": 0.1975, "step": 658500 }, { "epoch": 6.99, "learning_rate": 4.998615835009339e-05, "loss": 0.197, "step": 659000 }, { "epoch": 6.99, "eval_loss": 0.18548274040222168, "eval_runtime": 2.5558, "eval_samples_per_second": 898.724, "eval_steps_per_second": 14.085, "step": 659000 }, { "epoch": 7.0, "learning_rate": 4.988163567197043e-05, "loss": 0.1975, "step": 659500 }, { "epoch": 7.0, "learning_rate": 4.97771953363055e-05, "loss": 0.1977, "step": 660000 }, { "epoch": 7.0, "eval_loss": 0.1880265176296234, "eval_runtime": 2.6377, "eval_samples_per_second": 870.836, "eval_steps_per_second": 13.648, "step": 660000 }, { "epoch": 7.01, "learning_rate": 4.967283762863444e-05, "loss": 0.1974, "step": 660500 }, { "epoch": 7.01, "learning_rate": 4.956856283426728e-05, "loss": 0.1972, "step": 661000 }, { "epoch": 7.01, "eval_loss": 0.18715223670005798, "eval_runtime": 2.6628, "eval_samples_per_second": 862.627, "eval_steps_per_second": 13.52, "step": 661000 }, { "epoch": 7.02, "learning_rate": 4.946437123828732e-05, "loss": 0.1971, "step": 661500 }, { "epoch": 7.03, "learning_rate": 4.936026312555037e-05, "loss": 0.1967, "step": 662000 }, { "epoch": 7.03, "eval_loss": 0.18600988388061523, "eval_runtime": 2.614, "eval_samples_per_second": 878.714, "eval_steps_per_second": 13.772, "step": 662000 }, { "epoch": 7.03, "learning_rate": 4.925623878068408e-05, "loss": 0.1972, "step": 662500 }, { "epoch": 7.04, "learning_rate": 4.915229848808698e-05, "loss": 0.1973, "step": 663000 }, { "epoch": 7.04, "eval_loss": 0.1860048770904541, "eval_runtime": 2.673, "eval_samples_per_second": 859.331, "eval_steps_per_second": 13.468, "step": 663000 }, { "epoch": 7.04, "learning_rate": 4.904844253192795e-05, "loss": 0.1965, "step": 663500 }, { "epoch": 7.05, "learning_rate": 4.8944671196145136e-05, "loss": 0.1967, "step": 664000 }, { "epoch": 7.05, "eval_loss": 0.18750624358654022, "eval_runtime": 2.7099, "eval_samples_per_second": 847.62, "eval_steps_per_second": 13.284, "step": 664000 }, { "epoch": 7.05, "learning_rate": 4.884098476444539e-05, "loss": 0.1968, "step": 664500 }, { "epoch": 7.06, "learning_rate": 4.8737383520303546e-05, "loss": 0.1965, "step": 665000 }, { "epoch": 7.06, "eval_loss": 0.18656140565872192, "eval_runtime": 2.728, "eval_samples_per_second": 842.009, "eval_steps_per_second": 13.196, "step": 665000 }, { "epoch": 7.07, "learning_rate": 4.8633867746961356e-05, "loss": 0.1968, "step": 665500 }, { "epoch": 7.07, "learning_rate": 4.853043772742709e-05, "loss": 0.1969, "step": 666000 }, { "epoch": 7.07, "eval_loss": 0.18721851706504822, "eval_runtime": 2.6334, "eval_samples_per_second": 872.245, "eval_steps_per_second": 13.67, "step": 666000 }, { "epoch": 7.08, "learning_rate": 4.8427093744474364e-05, "loss": 0.1971, "step": 666500 }, { "epoch": 7.08, "learning_rate": 4.832383608064172e-05, "loss": 0.1968, "step": 667000 }, { "epoch": 7.08, "eval_loss": 0.18801318109035492, "eval_runtime": 2.7526, "eval_samples_per_second": 834.477, "eval_steps_per_second": 13.078, "step": 667000 }, { "epoch": 7.09, "learning_rate": 4.822066501823172e-05, "loss": 0.1967, "step": 667500 }, { "epoch": 7.09, "learning_rate": 4.811758083931005e-05, "loss": 0.1967, "step": 668000 }, { "epoch": 7.09, "eval_loss": 0.18405194580554962, "eval_runtime": 2.6183, "eval_samples_per_second": 877.299, "eval_steps_per_second": 13.75, "step": 668000 }, { "epoch": 7.1, "learning_rate": 4.8014583825704976e-05, "loss": 0.1965, "step": 668500 }, { "epoch": 7.1, "learning_rate": 4.791167425900632e-05, "loss": 0.1968, "step": 669000 }, { "epoch": 7.1, "eval_loss": 0.18537673354148865, "eval_runtime": 2.6514, "eval_samples_per_second": 866.335, "eval_steps_per_second": 13.578, "step": 669000 }, { "epoch": 7.11, "learning_rate": 4.780885242056493e-05, "loss": 0.1963, "step": 669500 }, { "epoch": 7.12, "learning_rate": 4.770611859149185e-05, "loss": 0.1963, "step": 670000 }, { "epoch": 7.12, "eval_loss": 0.187167689204216, "eval_runtime": 2.6705, "eval_samples_per_second": 860.127, "eval_steps_per_second": 13.48, "step": 670000 }, { "epoch": 7.12, "learning_rate": 4.7603473052657374e-05, "loss": 0.1966, "step": 670500 }, { "epoch": 7.13, "learning_rate": 4.7500916084690564e-05, "loss": 0.1967, "step": 671000 }, { "epoch": 7.13, "eval_loss": 0.186045840382576, "eval_runtime": 2.6321, "eval_samples_per_second": 872.688, "eval_steps_per_second": 13.677, "step": 671000 }, { "epoch": 7.13, "learning_rate": 4.7398447967978165e-05, "loss": 0.1963, "step": 671500 }, { "epoch": 7.14, "learning_rate": 4.729606898266411e-05, "loss": 0.1969, "step": 672000 }, { "epoch": 7.14, "eval_loss": 0.1882571130990982, "eval_runtime": 2.6187, "eval_samples_per_second": 877.14, "eval_steps_per_second": 13.747, "step": 672000 }, { "epoch": 7.14, "learning_rate": 4.71937794086487e-05, "loss": 0.1963, "step": 672500 }, { "epoch": 7.15, "learning_rate": 4.709157952558768e-05, "loss": 0.1963, "step": 673000 }, { "epoch": 7.15, "eval_loss": 0.18748030066490173, "eval_runtime": 2.6428, "eval_samples_per_second": 869.147, "eval_steps_per_second": 13.622, "step": 673000 }, { "epoch": 7.15, "learning_rate": 4.698946961289163e-05, "loss": 0.1962, "step": 673500 }, { "epoch": 7.16, "learning_rate": 4.688744994972514e-05, "loss": 0.1962, "step": 674000 }, { "epoch": 7.16, "eval_loss": 0.18707983195781708, "eval_runtime": 2.661, "eval_samples_per_second": 863.195, "eval_steps_per_second": 13.529, "step": 674000 }, { "epoch": 7.17, "learning_rate": 4.6785520815006085e-05, "loss": 0.1965, "step": 674500 }, { "epoch": 7.17, "learning_rate": 4.668368248740485e-05, "loss": 0.196, "step": 675000 }, { "epoch": 7.17, "eval_loss": 0.18474501371383667, "eval_runtime": 2.6847, "eval_samples_per_second": 855.597, "eval_steps_per_second": 13.409, "step": 675000 }, { "epoch": 7.18, "learning_rate": 4.658193524534351e-05, "loss": 0.1959, "step": 675500 }, { "epoch": 7.18, "learning_rate": 4.6480279366995116e-05, "loss": 0.1959, "step": 676000 }, { "epoch": 7.18, "eval_loss": 0.1878414750099182, "eval_runtime": 2.6912, "eval_samples_per_second": 853.536, "eval_steps_per_second": 13.377, "step": 676000 }, { "epoch": 7.19, "learning_rate": 4.637871513028303e-05, "loss": 0.196, "step": 676500 }, { "epoch": 7.19, "learning_rate": 4.6277242812879914e-05, "loss": 0.1959, "step": 677000 }, { "epoch": 7.19, "eval_loss": 0.18550430238246918, "eval_runtime": 2.7131, "eval_samples_per_second": 846.623, "eval_steps_per_second": 13.269, "step": 677000 }, { "epoch": 7.2, "learning_rate": 4.617586269220728e-05, "loss": 0.1959, "step": 677500 }, { "epoch": 7.2, "learning_rate": 4.607457504543447e-05, "loss": 0.1962, "step": 678000 }, { "epoch": 7.2, "eval_loss": 0.1865391880273819, "eval_runtime": 2.6562, "eval_samples_per_second": 864.771, "eval_steps_per_second": 13.553, "step": 678000 }, { "epoch": 7.21, "learning_rate": 4.597338014947801e-05, "loss": 0.1956, "step": 678500 }, { "epoch": 7.22, "learning_rate": 4.5872278281000955e-05, "loss": 0.1954, "step": 679000 }, { "epoch": 7.22, "eval_loss": 0.18739037215709686, "eval_runtime": 2.5862, "eval_samples_per_second": 888.174, "eval_steps_per_second": 13.92, "step": 679000 }, { "epoch": 7.22, "learning_rate": 4.577126971641189e-05, "loss": 0.1959, "step": 679500 }, { "epoch": 7.23, "learning_rate": 4.567035473186444e-05, "loss": 0.1955, "step": 680000 }, { "epoch": 7.23, "eval_loss": 0.18440315127372742, "eval_runtime": 2.6652, "eval_samples_per_second": 861.845, "eval_steps_per_second": 13.507, "step": 680000 }, { "epoch": 7.23, "learning_rate": 4.556953360325625e-05, "loss": 0.1953, "step": 680500 }, { "epoch": 7.24, "learning_rate": 4.546880660622845e-05, "loss": 0.1956, "step": 681000 }, { "epoch": 7.24, "eval_loss": 0.1863844096660614, "eval_runtime": 2.65, "eval_samples_per_second": 866.784, "eval_steps_per_second": 13.585, "step": 681000 }, { "epoch": 7.24, "learning_rate": 4.5368174016164844e-05, "loss": 0.1959, "step": 681500 }, { "epoch": 7.25, "learning_rate": 4.5267636108191036e-05, "loss": 0.1954, "step": 682000 }, { "epoch": 7.25, "eval_loss": 0.18577434122562408, "eval_runtime": 2.6836, "eval_samples_per_second": 855.93, "eval_steps_per_second": 13.415, "step": 682000 }, { "epoch": 7.25, "learning_rate": 4.5167193157173913e-05, "loss": 0.1954, "step": 682500 }, { "epoch": 7.26, "learning_rate": 4.5066845437720555e-05, "loss": 0.1956, "step": 683000 }, { "epoch": 7.26, "eval_loss": 0.1866350919008255, "eval_runtime": 2.6163, "eval_samples_per_second": 877.947, "eval_steps_per_second": 13.76, "step": 683000 }, { "epoch": 7.27, "learning_rate": 4.4966593224177866e-05, "loss": 0.1954, "step": 683500 }, { "epoch": 7.27, "learning_rate": 4.4866436790631564e-05, "loss": 0.1956, "step": 684000 }, { "epoch": 7.27, "eval_loss": 0.1872914433479309, "eval_runtime": 2.7089, "eval_samples_per_second": 847.935, "eval_steps_per_second": 13.289, "step": 684000 }, { "epoch": 7.28, "learning_rate": 4.476637641090551e-05, "loss": 0.1951, "step": 684500 }, { "epoch": 7.28, "learning_rate": 4.4666412358560955e-05, "loss": 0.195, "step": 685000 }, { "epoch": 7.28, "eval_loss": 0.1856185644865036, "eval_runtime": 2.6121, "eval_samples_per_second": 879.375, "eval_steps_per_second": 13.782, "step": 685000 }, { "epoch": 7.29, "learning_rate": 4.456654490689578e-05, "loss": 0.1952, "step": 685500 }, { "epoch": 7.29, "learning_rate": 4.4466774328943796e-05, "loss": 0.1953, "step": 686000 }, { "epoch": 7.29, "eval_loss": 0.18476100265979767, "eval_runtime": 2.6153, "eval_samples_per_second": 878.305, "eval_steps_per_second": 13.765, "step": 686000 }, { "epoch": 7.3, "learning_rate": 4.4367100897474e-05, "loss": 0.1955, "step": 686500 }, { "epoch": 7.3, "learning_rate": 4.426752488498972e-05, "loss": 0.1952, "step": 687000 }, { "epoch": 7.3, "eval_loss": 0.18481390178203583, "eval_runtime": 2.6488, "eval_samples_per_second": 867.183, "eval_steps_per_second": 13.591, "step": 687000 }, { "epoch": 7.31, "learning_rate": 4.4168046563727945e-05, "loss": 0.1953, "step": 687500 }, { "epoch": 7.32, "learning_rate": 4.406866620565862e-05, "loss": 0.1948, "step": 688000 }, { "epoch": 7.32, "eval_loss": 0.185079887509346, "eval_runtime": 2.6355, "eval_samples_per_second": 871.569, "eval_steps_per_second": 13.66, "step": 688000 }, { "epoch": 7.32, "learning_rate": 4.396938408248383e-05, "loss": 0.1952, "step": 688500 }, { "epoch": 7.33, "learning_rate": 4.3870200465637164e-05, "loss": 0.1951, "step": 689000 }, { "epoch": 7.33, "eval_loss": 0.1854194700717926, "eval_runtime": 2.6418, "eval_samples_per_second": 869.478, "eval_steps_per_second": 13.627, "step": 689000 }, { "epoch": 7.33, "learning_rate": 4.377111562628282e-05, "loss": 0.1948, "step": 689500 }, { "epoch": 7.34, "learning_rate": 4.3672129835314955e-05, "loss": 0.1951, "step": 690000 }, { "epoch": 7.34, "eval_loss": 0.18585672974586487, "eval_runtime": 2.7189, "eval_samples_per_second": 844.818, "eval_steps_per_second": 13.241, "step": 690000 }, { "epoch": 7.34, "learning_rate": 4.3573243363356916e-05, "loss": 0.1948, "step": 690500 }, { "epoch": 7.35, "learning_rate": 4.347445648076057e-05, "loss": 0.1953, "step": 691000 }, { "epoch": 7.35, "eval_loss": 0.18430623412132263, "eval_runtime": 2.6745, "eval_samples_per_second": 858.843, "eval_steps_per_second": 13.46, "step": 691000 }, { "epoch": 7.36, "learning_rate": 4.337576945760554e-05, "loss": 0.1949, "step": 691500 }, { "epoch": 7.36, "learning_rate": 4.327718256369826e-05, "loss": 0.1947, "step": 692000 }, { "epoch": 7.36, "eval_loss": 0.18453127145767212, "eval_runtime": 2.6094, "eval_samples_per_second": 880.283, "eval_steps_per_second": 13.796, "step": 692000 }, { "epoch": 7.37, "learning_rate": 4.317869606857162e-05, "loss": 0.1949, "step": 692500 }, { "epoch": 7.37, "learning_rate": 4.3080310241483885e-05, "loss": 0.1947, "step": 693000 }, { "epoch": 7.37, "eval_loss": 0.18398995697498322, "eval_runtime": 2.7028, "eval_samples_per_second": 849.851, "eval_steps_per_second": 13.319, "step": 693000 }, { "epoch": 7.38, "learning_rate": 4.298202535141818e-05, "loss": 0.1944, "step": 693500 }, { "epoch": 7.38, "learning_rate": 4.2883841667081675e-05, "loss": 0.1945, "step": 694000 }, { "epoch": 7.38, "eval_loss": 0.1815757304430008, "eval_runtime": 2.6223, "eval_samples_per_second": 875.945, "eval_steps_per_second": 13.728, "step": 694000 }, { "epoch": 7.39, "learning_rate": 4.2785759456904745e-05, "loss": 0.1943, "step": 694500 }, { "epoch": 7.39, "learning_rate": 4.268777898904044e-05, "loss": 0.1945, "step": 695000 }, { "epoch": 7.39, "eval_loss": 0.1843911111354828, "eval_runtime": 2.6472, "eval_samples_per_second": 867.72, "eval_steps_per_second": 13.599, "step": 695000 }, { "epoch": 7.4, "learning_rate": 4.2589900531363606e-05, "loss": 0.1947, "step": 695500 }, { "epoch": 7.41, "learning_rate": 4.2492124351470214e-05, "loss": 0.1946, "step": 696000 }, { "epoch": 7.41, "eval_loss": 0.18493103981018066, "eval_runtime": 2.6337, "eval_samples_per_second": 872.173, "eval_steps_per_second": 13.669, "step": 696000 }, { "epoch": 7.41, "learning_rate": 4.239445071667666e-05, "loss": 0.1946, "step": 696500 }, { "epoch": 7.42, "learning_rate": 4.2296879894018835e-05, "loss": 0.1944, "step": 697000 }, { "epoch": 7.42, "eval_loss": 0.18418000638484955, "eval_runtime": 2.5716, "eval_samples_per_second": 893.203, "eval_steps_per_second": 13.999, "step": 697000 }, { "epoch": 7.42, "learning_rate": 4.219941215025171e-05, "loss": 0.1943, "step": 697500 }, { "epoch": 7.43, "learning_rate": 4.210204775184834e-05, "loss": 0.1948, "step": 698000 }, { "epoch": 7.43, "eval_loss": 0.18401235342025757, "eval_runtime": 2.6433, "eval_samples_per_second": 868.996, "eval_steps_per_second": 13.619, "step": 698000 }, { "epoch": 7.43, "learning_rate": 4.2004786964999304e-05, "loss": 0.1944, "step": 698500 }, { "epoch": 7.44, "learning_rate": 4.190763005561186e-05, "loss": 0.1943, "step": 699000 }, { "epoch": 7.44, "eval_loss": 0.1846538931131363, "eval_runtime": 2.6279, "eval_samples_per_second": 874.077, "eval_steps_per_second": 13.699, "step": 699000 }, { "epoch": 7.44, "learning_rate": 4.1810577289309266e-05, "loss": 0.1941, "step": 699500 }, { "epoch": 7.45, "learning_rate": 4.171362893143013e-05, "loss": 0.1944, "step": 700000 }, { "epoch": 7.45, "eval_loss": 0.1839645951986313, "eval_runtime": 2.6536, "eval_samples_per_second": 865.616, "eval_steps_per_second": 13.566, "step": 700000 }, { "epoch": 7.46, "learning_rate": 4.1616785247027506e-05, "loss": 0.1942, "step": 700500 }, { "epoch": 7.46, "learning_rate": 4.1520046500868384e-05, "loss": 0.1941, "step": 701000 }, { "epoch": 7.46, "eval_loss": 0.18309521675109863, "eval_runtime": 2.6732, "eval_samples_per_second": 859.262, "eval_steps_per_second": 13.467, "step": 701000 }, { "epoch": 7.47, "learning_rate": 4.1423412957432775e-05, "loss": 0.1943, "step": 701500 }, { "epoch": 7.47, "learning_rate": 4.1326884880913074e-05, "loss": 0.1939, "step": 702000 }, { "epoch": 7.47, "eval_loss": 0.18348637223243713, "eval_runtime": 2.6604, "eval_samples_per_second": 863.415, "eval_steps_per_second": 13.532, "step": 702000 }, { "epoch": 7.48, "learning_rate": 4.123046253521341e-05, "loss": 0.1939, "step": 702500 }, { "epoch": 7.48, "learning_rate": 4.1134146183948724e-05, "loss": 0.1942, "step": 703000 }, { "epoch": 7.48, "eval_loss": 0.18514606356620789, "eval_runtime": 2.6612, "eval_samples_per_second": 863.158, "eval_steps_per_second": 13.528, "step": 703000 }, { "epoch": 7.49, "learning_rate": 4.1037936090444315e-05, "loss": 0.1945, "step": 703500 }, { "epoch": 7.49, "learning_rate": 4.0941832517734885e-05, "loss": 0.1938, "step": 704000 }, { "epoch": 7.49, "eval_loss": 0.18320082128047943, "eval_runtime": 2.5921, "eval_samples_per_second": 886.166, "eval_steps_per_second": 13.889, "step": 704000 }, { "epoch": 7.5, "learning_rate": 4.084583572856388e-05, "loss": 0.1937, "step": 704500 }, { "epoch": 7.51, "learning_rate": 4.0749945985382915e-05, "loss": 0.1938, "step": 705000 }, { "epoch": 7.51, "eval_loss": 0.181605264544487, "eval_runtime": 2.7114, "eval_samples_per_second": 847.159, "eval_steps_per_second": 13.277, "step": 705000 }, { "epoch": 7.51, "learning_rate": 4.065416355035087e-05, "loss": 0.1941, "step": 705500 }, { "epoch": 7.52, "learning_rate": 4.0558488685333235e-05, "loss": 0.1938, "step": 706000 }, { "epoch": 7.52, "eval_loss": 0.18272297084331512, "eval_runtime": 2.6623, "eval_samples_per_second": 862.792, "eval_steps_per_second": 13.522, "step": 706000 }, { "epoch": 7.52, "learning_rate": 4.04629216519015e-05, "loss": 0.1937, "step": 706500 }, { "epoch": 7.53, "learning_rate": 4.036746271133223e-05, "loss": 0.1936, "step": 707000 }, { "epoch": 7.53, "eval_loss": 0.1840648055076599, "eval_runtime": 2.6725, "eval_samples_per_second": 859.485, "eval_steps_per_second": 13.47, "step": 707000 }, { "epoch": 7.53, "learning_rate": 4.0272112124606546e-05, "loss": 0.1936, "step": 707500 }, { "epoch": 7.54, "learning_rate": 4.0176870152409324e-05, "loss": 0.1937, "step": 708000 }, { "epoch": 7.54, "eval_loss": 0.1836749017238617, "eval_runtime": 2.7112, "eval_samples_per_second": 847.228, "eval_steps_per_second": 13.278, "step": 708000 }, { "epoch": 7.54, "learning_rate": 4.008173705512842e-05, "loss": 0.1938, "step": 708500 }, { "epoch": 7.55, "learning_rate": 3.998671309285417e-05, "loss": 0.1937, "step": 709000 }, { "epoch": 7.55, "eval_loss": 0.18317854404449463, "eval_runtime": 2.6768, "eval_samples_per_second": 858.105, "eval_steps_per_second": 13.449, "step": 709000 }, { "epoch": 7.56, "learning_rate": 3.989179852537839e-05, "loss": 0.1937, "step": 709500 }, { "epoch": 7.56, "learning_rate": 3.979699361219395e-05, "loss": 0.1938, "step": 710000 }, { "epoch": 7.56, "eval_loss": 0.18444040417671204, "eval_runtime": 2.6261, "eval_samples_per_second": 874.67, "eval_steps_per_second": 13.708, "step": 710000 }, { "epoch": 7.57, "learning_rate": 3.9702298612493816e-05, "loss": 0.1938, "step": 710500 }, { "epoch": 7.57, "learning_rate": 3.960771378517049e-05, "loss": 0.1935, "step": 711000 }, { "epoch": 7.57, "eval_loss": 0.183274045586586, "eval_runtime": 2.6269, "eval_samples_per_second": 874.423, "eval_steps_per_second": 13.705, "step": 711000 }, { "epoch": 7.58, "learning_rate": 3.951323938881533e-05, "loss": 0.1936, "step": 711500 }, { "epoch": 7.58, "learning_rate": 3.941887568171766e-05, "loss": 0.1934, "step": 712000 }, { "epoch": 7.58, "eval_loss": 0.18676131963729858, "eval_runtime": 2.7142, "eval_samples_per_second": 846.278, "eval_steps_per_second": 13.263, "step": 712000 }, { "epoch": 7.59, "learning_rate": 3.9324622921864323e-05, "loss": 0.1933, "step": 712500 }, { "epoch": 7.59, "learning_rate": 3.923048136693873e-05, "loss": 0.1933, "step": 713000 }, { "epoch": 7.59, "eval_loss": 0.18421515822410583, "eval_runtime": 2.6188, "eval_samples_per_second": 877.135, "eval_steps_per_second": 13.747, "step": 713000 }, { "epoch": 7.6, "learning_rate": 3.913645127432028e-05, "loss": 0.193, "step": 713500 }, { "epoch": 7.61, "learning_rate": 3.904253290108369e-05, "loss": 0.193, "step": 714000 }, { "epoch": 7.61, "eval_loss": 0.18448708951473236, "eval_runtime": 2.6531, "eval_samples_per_second": 865.779, "eval_steps_per_second": 13.569, "step": 714000 }, { "epoch": 7.61, "learning_rate": 3.8948726503998176e-05, "loss": 0.1931, "step": 714500 }, { "epoch": 7.62, "learning_rate": 3.885503233952689e-05, "loss": 0.1937, "step": 715000 }, { "epoch": 7.62, "eval_loss": 0.18292050063610077, "eval_runtime": 2.5788, "eval_samples_per_second": 890.71, "eval_steps_per_second": 13.96, "step": 715000 }, { "epoch": 7.62, "learning_rate": 3.876145066382606e-05, "loss": 0.1931, "step": 715500 }, { "epoch": 7.63, "learning_rate": 3.86679817327444e-05, "loss": 0.1931, "step": 716000 }, { "epoch": 7.63, "eval_loss": 0.18449412286281586, "eval_runtime": 2.6103, "eval_samples_per_second": 879.974, "eval_steps_per_second": 13.792, "step": 716000 }, { "epoch": 7.63, "learning_rate": 3.857462580182245e-05, "loss": 0.1931, "step": 716500 }, { "epoch": 7.64, "learning_rate": 3.848138312629171e-05, "loss": 0.193, "step": 717000 }, { "epoch": 7.64, "eval_loss": 0.18389998376369476, "eval_runtime": 2.6596, "eval_samples_per_second": 863.655, "eval_steps_per_second": 13.536, "step": 717000 }, { "epoch": 7.65, "learning_rate": 3.838825396107415e-05, "loss": 0.1926, "step": 717500 }, { "epoch": 7.65, "learning_rate": 3.8295238560781317e-05, "loss": 0.1927, "step": 718000 }, { "epoch": 7.65, "eval_loss": 0.18192943930625916, "eval_runtime": 2.6184, "eval_samples_per_second": 877.259, "eval_steps_per_second": 13.749, "step": 718000 }, { "epoch": 7.66, "learning_rate": 3.820233717971374e-05, "loss": 0.193, "step": 718500 }, { "epoch": 7.66, "learning_rate": 3.810955007186029e-05, "loss": 0.1928, "step": 719000 }, { "epoch": 7.66, "eval_loss": 0.1814703792333603, "eval_runtime": 2.627, "eval_samples_per_second": 874.39, "eval_steps_per_second": 13.704, "step": 719000 }, { "epoch": 7.67, "learning_rate": 3.801687749089737e-05, "loss": 0.1928, "step": 719500 }, { "epoch": 7.67, "learning_rate": 3.792431969018824e-05, "loss": 0.1927, "step": 720000 }, { "epoch": 7.67, "eval_loss": 0.18235580623149872, "eval_runtime": 2.6689, "eval_samples_per_second": 860.641, "eval_steps_per_second": 13.488, "step": 720000 }, { "epoch": 7.68, "learning_rate": 3.783187692278245e-05, "loss": 0.1924, "step": 720500 }, { "epoch": 7.68, "learning_rate": 3.7739549441414945e-05, "loss": 0.1927, "step": 721000 }, { "epoch": 7.68, "eval_loss": 0.18421824276447296, "eval_runtime": 2.699, "eval_samples_per_second": 851.051, "eval_steps_per_second": 13.338, "step": 721000 }, { "epoch": 7.69, "learning_rate": 3.764733749850558e-05, "loss": 0.1928, "step": 721500 }, { "epoch": 7.7, "learning_rate": 3.755524134615825e-05, "loss": 0.1928, "step": 722000 }, { "epoch": 7.7, "eval_loss": 0.1823606938123703, "eval_runtime": 2.6367, "eval_samples_per_second": 871.176, "eval_steps_per_second": 13.654, "step": 722000 }, { "epoch": 7.7, "learning_rate": 3.746326123616032e-05, "loss": 0.1927, "step": 722500 }, { "epoch": 7.71, "learning_rate": 3.7371397419981925e-05, "loss": 0.1924, "step": 723000 }, { "epoch": 7.71, "eval_loss": 0.18170827627182007, "eval_runtime": 2.6026, "eval_samples_per_second": 882.563, "eval_steps_per_second": 13.832, "step": 723000 }, { "epoch": 7.71, "learning_rate": 3.7279650148775196e-05, "loss": 0.1927, "step": 723500 }, { "epoch": 7.72, "learning_rate": 3.7188019673373706e-05, "loss": 0.1923, "step": 724000 }, { "epoch": 7.72, "eval_loss": 0.18194718658924103, "eval_runtime": 2.729, "eval_samples_per_second": 841.686, "eval_steps_per_second": 13.191, "step": 724000 }, { "epoch": 7.72, "learning_rate": 3.709650624429166e-05, "loss": 0.1921, "step": 724500 }, { "epoch": 7.73, "learning_rate": 3.700511011172325e-05, "loss": 0.1924, "step": 725000 }, { "epoch": 7.73, "eval_loss": 0.18209348618984222, "eval_runtime": 2.6667, "eval_samples_per_second": 861.365, "eval_steps_per_second": 13.5, "step": 725000 }, { "epoch": 7.73, "learning_rate": 3.691383152554207e-05, "loss": 0.1926, "step": 725500 }, { "epoch": 7.74, "learning_rate": 3.682267073530023e-05, "loss": 0.1922, "step": 726000 }, { "epoch": 7.74, "eval_loss": 0.18260575830936432, "eval_runtime": 2.7378, "eval_samples_per_second": 838.986, "eval_steps_per_second": 13.149, "step": 726000 }, { "epoch": 7.75, "learning_rate": 3.67316279902279e-05, "loss": 0.1921, "step": 726500 }, { "epoch": 7.75, "learning_rate": 3.664070353923245e-05, "loss": 0.1923, "step": 727000 }, { "epoch": 7.75, "eval_loss": 0.18323123455047607, "eval_runtime": 2.6275, "eval_samples_per_second": 874.227, "eval_steps_per_second": 13.701, "step": 727000 }, { "epoch": 7.76, "learning_rate": 3.654989763089782e-05, "loss": 0.1921, "step": 727500 }, { "epoch": 7.76, "learning_rate": 3.645921051348396e-05, "loss": 0.1926, "step": 728000 }, { "epoch": 7.76, "eval_loss": 0.18396110832691193, "eval_runtime": 2.6742, "eval_samples_per_second": 858.943, "eval_steps_per_second": 13.462, "step": 728000 }, { "epoch": 7.77, "learning_rate": 3.6368642434925924e-05, "loss": 0.192, "step": 728500 }, { "epoch": 7.77, "learning_rate": 3.627819364283345e-05, "loss": 0.1921, "step": 729000 }, { "epoch": 7.77, "eval_loss": 0.18145744502544403, "eval_runtime": 2.6278, "eval_samples_per_second": 874.116, "eval_steps_per_second": 13.7, "step": 729000 }, { "epoch": 7.78, "learning_rate": 3.6187864384490035e-05, "loss": 0.1916, "step": 729500 }, { "epoch": 7.78, "learning_rate": 3.6097654906852405e-05, "loss": 0.1917, "step": 730000 }, { "epoch": 7.78, "eval_loss": 0.1821279525756836, "eval_runtime": 2.6606, "eval_samples_per_second": 863.347, "eval_steps_per_second": 13.531, "step": 730000 }, { "epoch": 7.79, "learning_rate": 3.600756545654988e-05, "loss": 0.1924, "step": 730500 }, { "epoch": 7.8, "learning_rate": 3.591759627988353e-05, "loss": 0.1921, "step": 731000 }, { "epoch": 7.8, "eval_loss": 0.1826609969139099, "eval_runtime": 2.5577, "eval_samples_per_second": 898.086, "eval_steps_per_second": 14.075, "step": 731000 }, { "epoch": 7.8, "learning_rate": 3.582774762282568e-05, "loss": 0.1926, "step": 731500 }, { "epoch": 7.81, "learning_rate": 3.573801973101913e-05, "loss": 0.1919, "step": 732000 }, { "epoch": 7.81, "eval_loss": 0.1833614856004715, "eval_runtime": 2.5691, "eval_samples_per_second": 894.085, "eval_steps_per_second": 14.013, "step": 732000 }, { "epoch": 7.81, "learning_rate": 3.564841284977646e-05, "loss": 0.1918, "step": 732500 }, { "epoch": 7.82, "learning_rate": 3.5558927224079534e-05, "loss": 0.1922, "step": 733000 }, { "epoch": 7.82, "eval_loss": 0.18190455436706543, "eval_runtime": 2.6712, "eval_samples_per_second": 859.911, "eval_steps_per_second": 13.477, "step": 733000 }, { "epoch": 7.82, "learning_rate": 3.546956309857859e-05, "loss": 0.1918, "step": 733500 }, { "epoch": 7.83, "learning_rate": 3.5380320717591716e-05, "loss": 0.1918, "step": 734000 }, { "epoch": 7.83, "eval_loss": 0.18287116289138794, "eval_runtime": 2.6384, "eval_samples_per_second": 870.604, "eval_steps_per_second": 13.645, "step": 734000 }, { "epoch": 7.83, "learning_rate": 3.5291200325104234e-05, "loss": 0.1916, "step": 734500 }, { "epoch": 7.84, "learning_rate": 3.5202202164767836e-05, "loss": 0.1916, "step": 735000 }, { "epoch": 7.84, "eval_loss": 0.18400417268276215, "eval_runtime": 2.6593, "eval_samples_per_second": 863.766, "eval_steps_per_second": 13.537, "step": 735000 }, { "epoch": 7.85, "learning_rate": 3.511332647990014e-05, "loss": 0.1918, "step": 735500 }, { "epoch": 7.85, "learning_rate": 3.5024573513483864e-05, "loss": 0.1918, "step": 736000 }, { "epoch": 7.85, "eval_loss": 0.17965777218341827, "eval_runtime": 2.6148, "eval_samples_per_second": 878.457, "eval_steps_per_second": 13.768, "step": 736000 }, { "epoch": 7.86, "learning_rate": 3.493594350816619e-05, "loss": 0.1916, "step": 736500 }, { "epoch": 7.86, "learning_rate": 3.484743670625822e-05, "loss": 0.1913, "step": 737000 }, { "epoch": 7.86, "eval_loss": 0.18245890736579895, "eval_runtime": 2.6872, "eval_samples_per_second": 854.791, "eval_steps_per_second": 13.397, "step": 737000 }, { "epoch": 7.87, "learning_rate": 3.4759053349734126e-05, "loss": 0.1916, "step": 737500 }, { "epoch": 7.87, "learning_rate": 3.467079368023068e-05, "loss": 0.1913, "step": 738000 }, { "epoch": 7.87, "eval_loss": 0.18023112416267395, "eval_runtime": 2.6258, "eval_samples_per_second": 874.787, "eval_steps_per_second": 13.71, "step": 738000 }, { "epoch": 7.88, "learning_rate": 3.458265793904642e-05, "loss": 0.1912, "step": 738500 }, { "epoch": 7.88, "learning_rate": 3.449464636714107e-05, "loss": 0.1911, "step": 739000 }, { "epoch": 7.88, "eval_loss": 0.18116475641727448, "eval_runtime": 2.5092, "eval_samples_per_second": 915.413, "eval_steps_per_second": 14.347, "step": 739000 }, { "epoch": 7.89, "learning_rate": 3.4406759205134966e-05, "loss": 0.191, "step": 739500 }, { "epoch": 7.9, "learning_rate": 3.431899669330819e-05, "loss": 0.1914, "step": 740000 }, { "epoch": 7.9, "eval_loss": 0.18056176602840424, "eval_runtime": 2.7172, "eval_samples_per_second": 845.342, "eval_steps_per_second": 13.249, "step": 740000 }, { "epoch": 7.9, "learning_rate": 3.4231359071600156e-05, "loss": 0.1915, "step": 740500 }, { "epoch": 7.91, "learning_rate": 3.4143846579608744e-05, "loss": 0.1915, "step": 741000 }, { "epoch": 7.91, "eval_loss": 0.18201898038387299, "eval_runtime": 2.6375, "eval_samples_per_second": 870.9, "eval_steps_per_second": 13.649, "step": 741000 }, { "epoch": 7.91, "learning_rate": 3.405645945658976e-05, "loss": 0.191, "step": 741500 }, { "epoch": 7.92, "learning_rate": 3.396919794145629e-05, "loss": 0.1915, "step": 742000 }, { "epoch": 7.92, "eval_loss": 0.1818230152130127, "eval_runtime": 2.6616, "eval_samples_per_second": 863.023, "eval_steps_per_second": 13.526, "step": 742000 }, { "epoch": 7.92, "learning_rate": 3.3882062272777936e-05, "loss": 0.191, "step": 742500 }, { "epoch": 7.93, "learning_rate": 3.3795052688780345e-05, "loss": 0.191, "step": 743000 }, { "epoch": 7.93, "eval_loss": 0.18135210871696472, "eval_runtime": 2.7035, "eval_samples_per_second": 849.632, "eval_steps_per_second": 13.316, "step": 743000 }, { "epoch": 7.94, "learning_rate": 3.370816942734438e-05, "loss": 0.1912, "step": 743500 }, { "epoch": 7.94, "learning_rate": 3.362141272600552e-05, "loss": 0.1907, "step": 744000 }, { "epoch": 7.94, "eval_loss": 0.18246738612651825, "eval_runtime": 2.6098, "eval_samples_per_second": 880.139, "eval_steps_per_second": 13.794, "step": 744000 }, { "epoch": 7.95, "learning_rate": 3.3534782821953325e-05, "loss": 0.1907, "step": 744500 }, { "epoch": 7.95, "learning_rate": 3.3448279952030615e-05, "loss": 0.191, "step": 745000 }, { "epoch": 7.95, "eval_loss": 0.1824055016040802, "eval_runtime": 2.604, "eval_samples_per_second": 882.092, "eval_steps_per_second": 13.825, "step": 745000 }, { "epoch": 7.96, "learning_rate": 3.336190435273295e-05, "loss": 0.1911, "step": 745500 }, { "epoch": 7.96, "learning_rate": 3.327565626020793e-05, "loss": 0.1909, "step": 746000 }, { "epoch": 7.96, "eval_loss": 0.18200530111789703, "eval_runtime": 2.6615, "eval_samples_per_second": 863.04, "eval_steps_per_second": 13.526, "step": 746000 }, { "epoch": 7.97, "learning_rate": 3.31895359102545e-05, "loss": 0.191, "step": 746500 }, { "epoch": 7.97, "learning_rate": 3.3103543538322455e-05, "loss": 0.1908, "step": 747000 }, { "epoch": 7.97, "eval_loss": 0.17920434474945068, "eval_runtime": 2.6801, "eval_samples_per_second": 857.049, "eval_steps_per_second": 13.432, "step": 747000 }, { "epoch": 7.98, "learning_rate": 3.3017679379511645e-05, "loss": 0.191, "step": 747500 }, { "epoch": 7.99, "learning_rate": 3.293194366857137e-05, "loss": 0.1905, "step": 748000 }, { "epoch": 7.99, "eval_loss": 0.18376828730106354, "eval_runtime": 2.6586, "eval_samples_per_second": 863.997, "eval_steps_per_second": 13.541, "step": 748000 }, { "epoch": 7.99, "learning_rate": 3.2846336639899845e-05, "loss": 0.1904, "step": 748500 }, { "epoch": 8.0, "learning_rate": 3.276085852754336e-05, "loss": 0.1907, "step": 749000 }, { "epoch": 8.0, "eval_loss": 0.18018439412117004, "eval_runtime": 2.6652, "eval_samples_per_second": 861.854, "eval_steps_per_second": 13.508, "step": 749000 }, { "epoch": 8.0, "learning_rate": 3.267550956519586e-05, "loss": 0.1904, "step": 749500 }, { "epoch": 8.01, "learning_rate": 3.259028998619814e-05, "loss": 0.1904, "step": 750000 }, { "epoch": 8.01, "eval_loss": 0.1812291294336319, "eval_runtime": 2.6153, "eval_samples_per_second": 878.279, "eval_steps_per_second": 13.765, "step": 750000 }, { "epoch": 8.01, "learning_rate": 3.2505200023537225e-05, "loss": 0.1906, "step": 750500 }, { "epoch": 8.02, "learning_rate": 3.2420239909845894e-05, "loss": 0.1903, "step": 751000 }, { "epoch": 8.02, "eval_loss": 0.18331938982009888, "eval_runtime": 2.6413, "eval_samples_per_second": 869.633, "eval_steps_per_second": 13.629, "step": 751000 }, { "epoch": 8.02, "learning_rate": 3.233540987740179e-05, "loss": 0.1904, "step": 751500 }, { "epoch": 8.03, "learning_rate": 3.2250710158127045e-05, "loss": 0.1903, "step": 752000 }, { "epoch": 8.03, "eval_loss": 0.18277068436145782, "eval_runtime": 2.5885, "eval_samples_per_second": 887.393, "eval_steps_per_second": 13.908, "step": 752000 }, { "epoch": 8.04, "learning_rate": 3.216614098358741e-05, "loss": 0.1902, "step": 752500 }, { "epoch": 8.04, "learning_rate": 3.2081702584991786e-05, "loss": 0.1905, "step": 753000 }, { "epoch": 8.04, "eval_loss": 0.18270383775234222, "eval_runtime": 2.6624, "eval_samples_per_second": 862.74, "eval_steps_per_second": 13.521, "step": 753000 }, { "epoch": 8.05, "learning_rate": 3.1997395193191565e-05, "loss": 0.1901, "step": 753500 }, { "epoch": 8.05, "learning_rate": 3.191321903867988e-05, "loss": 0.1903, "step": 754000 }, { "epoch": 8.05, "eval_loss": 0.1816052347421646, "eval_runtime": 2.6428, "eval_samples_per_second": 869.149, "eval_steps_per_second": 13.622, "step": 754000 }, { "epoch": 8.06, "learning_rate": 3.18291743515912e-05, "loss": 0.1903, "step": 754500 }, { "epoch": 8.06, "learning_rate": 3.174526136170039e-05, "loss": 0.1905, "step": 755000 }, { "epoch": 8.06, "eval_loss": 0.1785338819026947, "eval_runtime": 2.6371, "eval_samples_per_second": 871.041, "eval_steps_per_second": 13.651, "step": 755000 }, { "epoch": 8.07, "learning_rate": 3.1661480298422433e-05, "loss": 0.1905, "step": 755500 }, { "epoch": 8.07, "learning_rate": 3.157783139081155e-05, "loss": 0.19, "step": 756000 }, { "epoch": 8.07, "eval_loss": 0.18119803071022034, "eval_runtime": 2.5992, "eval_samples_per_second": 883.749, "eval_steps_per_second": 13.851, "step": 756000 }, { "epoch": 8.08, "learning_rate": 3.149431486756063e-05, "loss": 0.19, "step": 756500 }, { "epoch": 8.09, "learning_rate": 3.141093095700072e-05, "loss": 0.1899, "step": 757000 }, { "epoch": 8.09, "eval_loss": 0.18124920129776, "eval_runtime": 2.6586, "eval_samples_per_second": 863.984, "eval_steps_per_second": 13.541, "step": 757000 }, { "epoch": 8.09, "learning_rate": 3.132767988710016e-05, "loss": 0.1898, "step": 757500 }, { "epoch": 8.1, "learning_rate": 3.1244561885464244e-05, "loss": 0.1902, "step": 758000 }, { "epoch": 8.1, "eval_loss": 0.1819985955953598, "eval_runtime": 2.6588, "eval_samples_per_second": 863.933, "eval_steps_per_second": 13.54, "step": 758000 }, { "epoch": 8.1, "learning_rate": 3.116157717933443e-05, "loss": 0.1902, "step": 758500 }, { "epoch": 8.11, "learning_rate": 3.107872599558769e-05, "loss": 0.19, "step": 759000 }, { "epoch": 8.11, "eval_loss": 0.18176889419555664, "eval_runtime": 2.6674, "eval_samples_per_second": 861.13, "eval_steps_per_second": 13.496, "step": 759000 }, { "epoch": 8.11, "learning_rate": 3.0996008560736083e-05, "loss": 0.1901, "step": 759500 }, { "epoch": 8.12, "learning_rate": 3.0913425100925795e-05, "loss": 0.19, "step": 760000 }, { "epoch": 8.12, "eval_loss": 0.17839893698692322, "eval_runtime": 2.7183, "eval_samples_per_second": 845.029, "eval_steps_per_second": 13.244, "step": 760000 }, { "epoch": 8.12, "learning_rate": 3.083097584193693e-05, "loss": 0.1896, "step": 760500 }, { "epoch": 8.13, "learning_rate": 3.0748661009182616e-05, "loss": 0.1898, "step": 761000 }, { "epoch": 8.13, "eval_loss": 0.17937108874320984, "eval_runtime": 2.6571, "eval_samples_per_second": 864.488, "eval_steps_per_second": 13.549, "step": 761000 }, { "epoch": 8.14, "learning_rate": 3.066648082770845e-05, "loss": 0.19, "step": 761500 }, { "epoch": 8.14, "learning_rate": 3.0584435522191896e-05, "loss": 0.1893, "step": 762000 }, { "epoch": 8.14, "eval_loss": 0.18026237189769745, "eval_runtime": 2.6859, "eval_samples_per_second": 855.204, "eval_steps_per_second": 13.403, "step": 762000 }, { "epoch": 8.15, "learning_rate": 3.0502525316941673e-05, "loss": 0.1896, "step": 762500 }, { "epoch": 8.15, "learning_rate": 3.0420750435897183e-05, "loss": 0.1895, "step": 763000 }, { "epoch": 8.15, "eval_loss": 0.18032604455947876, "eval_runtime": 2.6799, "eval_samples_per_second": 857.123, "eval_steps_per_second": 13.433, "step": 763000 }, { "epoch": 8.16, "learning_rate": 3.0339111102627846e-05, "loss": 0.1897, "step": 763500 }, { "epoch": 8.16, "learning_rate": 3.025760754033246e-05, "loss": 0.1892, "step": 764000 }, { "epoch": 8.16, "eval_loss": 0.18092449009418488, "eval_runtime": 2.6398, "eval_samples_per_second": 870.157, "eval_steps_per_second": 13.638, "step": 764000 }, { "epoch": 8.17, "learning_rate": 3.017623997183864e-05, "loss": 0.1897, "step": 764500 }, { "epoch": 8.17, "learning_rate": 3.0095008619602206e-05, "loss": 0.1894, "step": 765000 }, { "epoch": 8.17, "eval_loss": 0.17851532995700836, "eval_runtime": 2.6678, "eval_samples_per_second": 861.0, "eval_steps_per_second": 13.494, "step": 765000 }, { "epoch": 8.18, "learning_rate": 3.0013913705706587e-05, "loss": 0.1892, "step": 765500 }, { "epoch": 8.19, "learning_rate": 2.993295545186223e-05, "loss": 0.19, "step": 766000 }, { "epoch": 8.19, "eval_loss": 0.18047097325325012, "eval_runtime": 2.6997, "eval_samples_per_second": 850.838, "eval_steps_per_second": 13.335, "step": 766000 }, { "epoch": 8.19, "learning_rate": 2.9852134079405817e-05, "loss": 0.1897, "step": 766500 }, { "epoch": 8.2, "learning_rate": 2.977144980929996e-05, "loss": 0.1895, "step": 767000 }, { "epoch": 8.2, "eval_loss": 0.1800650954246521, "eval_runtime": 2.6124, "eval_samples_per_second": 879.254, "eval_steps_per_second": 13.78, "step": 767000 }, { "epoch": 8.2, "learning_rate": 2.969090286213233e-05, "loss": 0.1896, "step": 767500 }, { "epoch": 8.21, "learning_rate": 2.961049345811523e-05, "loss": 0.1891, "step": 768000 }, { "epoch": 8.21, "eval_loss": 0.18048371374607086, "eval_runtime": 2.7369, "eval_samples_per_second": 839.265, "eval_steps_per_second": 13.153, "step": 768000 }, { "epoch": 8.21, "learning_rate": 2.9530221817084937e-05, "loss": 0.1894, "step": 768500 }, { "epoch": 8.22, "learning_rate": 2.945008815850097e-05, "loss": 0.1895, "step": 769000 }, { "epoch": 8.22, "eval_loss": 0.17877112329006195, "eval_runtime": 2.6697, "eval_samples_per_second": 860.384, "eval_steps_per_second": 13.484, "step": 769000 }, { "epoch": 8.22, "learning_rate": 2.9370092701445748e-05, "loss": 0.1891, "step": 769500 }, { "epoch": 8.23, "learning_rate": 2.929023566462377e-05, "loss": 0.1894, "step": 770000 }, { "epoch": 8.23, "eval_loss": 0.1808948516845703, "eval_runtime": 2.6258, "eval_samples_per_second": 874.77, "eval_steps_per_second": 13.71, "step": 770000 }, { "epoch": 8.24, "learning_rate": 2.921051726636114e-05, "loss": 0.1892, "step": 770500 }, { "epoch": 8.24, "learning_rate": 2.9130937724604947e-05, "loss": 0.1895, "step": 771000 }, { "epoch": 8.24, "eval_loss": 0.17894434928894043, "eval_runtime": 2.6506, "eval_samples_per_second": 866.601, "eval_steps_per_second": 13.582, "step": 771000 }, { "epoch": 8.25, "learning_rate": 2.9051497256922545e-05, "loss": 0.1891, "step": 771500 }, { "epoch": 8.25, "learning_rate": 2.8972196080501208e-05, "loss": 0.1888, "step": 772000 }, { "epoch": 8.25, "eval_loss": 0.17959585785865784, "eval_runtime": 2.688, "eval_samples_per_second": 854.55, "eval_steps_per_second": 13.393, "step": 772000 }, { "epoch": 8.26, "learning_rate": 2.8893034412147268e-05, "loss": 0.189, "step": 772500 }, { "epoch": 8.26, "learning_rate": 2.8814012468285748e-05, "loss": 0.189, "step": 773000 }, { "epoch": 8.26, "eval_loss": 0.1775985211133957, "eval_runtime": 2.6568, "eval_samples_per_second": 864.565, "eval_steps_per_second": 13.55, "step": 773000 }, { "epoch": 8.27, "learning_rate": 2.8735130464959604e-05, "loss": 0.1888, "step": 773500 }, { "epoch": 8.28, "learning_rate": 2.865638861782922e-05, "loss": 0.1889, "step": 774000 }, { "epoch": 8.28, "eval_loss": 0.18201559782028198, "eval_runtime": 2.6162, "eval_samples_per_second": 877.995, "eval_steps_per_second": 13.76, "step": 774000 }, { "epoch": 8.28, "learning_rate": 2.8577787142171804e-05, "loss": 0.1888, "step": 774500 }, { "epoch": 8.29, "learning_rate": 2.849932625288079e-05, "loss": 0.1895, "step": 775000 }, { "epoch": 8.29, "eval_loss": 0.18033553659915924, "eval_runtime": 2.6434, "eval_samples_per_second": 868.969, "eval_steps_per_second": 13.619, "step": 775000 }, { "epoch": 8.29, "learning_rate": 2.8421006164465254e-05, "loss": 0.1891, "step": 775500 }, { "epoch": 8.3, "learning_rate": 2.8342827091049336e-05, "loss": 0.1888, "step": 776000 }, { "epoch": 8.3, "eval_loss": 0.17612183094024658, "eval_runtime": 2.6473, "eval_samples_per_second": 867.687, "eval_steps_per_second": 13.599, "step": 776000 }, { "epoch": 8.3, "learning_rate": 2.8264789246371605e-05, "loss": 0.189, "step": 776500 }, { "epoch": 8.31, "learning_rate": 2.8186892843784587e-05, "loss": 0.1889, "step": 777000 }, { "epoch": 8.31, "eval_loss": 0.17975667119026184, "eval_runtime": 2.6953, "eval_samples_per_second": 852.233, "eval_steps_per_second": 13.357, "step": 777000 }, { "epoch": 8.31, "learning_rate": 2.810913809625404e-05, "loss": 0.1884, "step": 777500 }, { "epoch": 8.32, "learning_rate": 2.803152521635851e-05, "loss": 0.1885, "step": 778000 }, { "epoch": 8.32, "eval_loss": 0.17638467252254486, "eval_runtime": 2.5904, "eval_samples_per_second": 886.746, "eval_steps_per_second": 13.898, "step": 778000 }, { "epoch": 8.33, "learning_rate": 2.795405441628862e-05, "loss": 0.1883, "step": 778500 }, { "epoch": 8.33, "learning_rate": 2.7876725907846578e-05, "loss": 0.1891, "step": 779000 }, { "epoch": 8.33, "eval_loss": 0.18025876581668854, "eval_runtime": 2.6761, "eval_samples_per_second": 858.354, "eval_steps_per_second": 13.453, "step": 779000 }, { "epoch": 8.34, "learning_rate": 2.7799539902445596e-05, "loss": 0.1883, "step": 779500 }, { "epoch": 8.34, "learning_rate": 2.7722496611109243e-05, "loss": 0.1888, "step": 780000 }, { "epoch": 8.34, "eval_loss": 0.1802951842546463, "eval_runtime": 2.7322, "eval_samples_per_second": 840.721, "eval_steps_per_second": 13.176, "step": 780000 }, { "epoch": 8.35, "learning_rate": 2.7645596244470935e-05, "loss": 0.1885, "step": 780500 }, { "epoch": 8.35, "learning_rate": 2.7568839012773365e-05, "loss": 0.1884, "step": 781000 }, { "epoch": 8.35, "eval_loss": 0.1805545538663864, "eval_runtime": 2.6452, "eval_samples_per_second": 868.354, "eval_steps_per_second": 13.609, "step": 781000 }, { "epoch": 8.36, "learning_rate": 2.7492225125867825e-05, "loss": 0.1889, "step": 781500 }, { "epoch": 8.36, "learning_rate": 2.7415754793213826e-05, "loss": 0.1886, "step": 782000 }, { "epoch": 8.36, "eval_loss": 0.1767302304506302, "eval_runtime": 2.6579, "eval_samples_per_second": 864.226, "eval_steps_per_second": 13.545, "step": 782000 }, { "epoch": 8.37, "learning_rate": 2.7339428223878283e-05, "loss": 0.1883, "step": 782500 }, { "epoch": 8.38, "learning_rate": 2.7263245626535116e-05, "loss": 0.1884, "step": 783000 }, { "epoch": 8.38, "eval_loss": 0.1802656203508377, "eval_runtime": 2.7131, "eval_samples_per_second": 846.621, "eval_steps_per_second": 13.269, "step": 783000 }, { "epoch": 8.38, "learning_rate": 2.7187207209464687e-05, "loss": 0.1883, "step": 783500 }, { "epoch": 8.39, "learning_rate": 2.7111313180553077e-05, "loss": 0.1882, "step": 784000 }, { "epoch": 8.39, "eval_loss": 0.17976997792720795, "eval_runtime": 2.668, "eval_samples_per_second": 860.932, "eval_steps_per_second": 13.493, "step": 784000 }, { "epoch": 8.39, "learning_rate": 2.703556374729169e-05, "loss": 0.1885, "step": 784500 }, { "epoch": 8.4, "learning_rate": 2.6959959116776587e-05, "loss": 0.188, "step": 785000 }, { "epoch": 8.4, "eval_loss": 0.1783231794834137, "eval_runtime": 2.6459, "eval_samples_per_second": 868.123, "eval_steps_per_second": 13.606, "step": 785000 }, { "epoch": 8.4, "learning_rate": 2.68844994957079e-05, "loss": 0.1881, "step": 785500 }, { "epoch": 8.41, "learning_rate": 2.6809185090389406e-05, "loss": 0.1884, "step": 786000 }, { "epoch": 8.41, "eval_loss": 0.18017184734344482, "eval_runtime": 2.6671, "eval_samples_per_second": 861.233, "eval_steps_per_second": 13.498, "step": 786000 }, { "epoch": 8.41, "learning_rate": 2.6734016106727777e-05, "loss": 0.1881, "step": 786500 }, { "epoch": 8.42, "learning_rate": 2.6658992750232167e-05, "loss": 0.188, "step": 787000 }, { "epoch": 8.42, "eval_loss": 0.17710144817829132, "eval_runtime": 2.6667, "eval_samples_per_second": 861.367, "eval_steps_per_second": 13.5, "step": 787000 }, { "epoch": 8.43, "learning_rate": 2.6584115226013553e-05, "loss": 0.1883, "step": 787500 }, { "epoch": 8.43, "learning_rate": 2.6509383738784218e-05, "loss": 0.188, "step": 788000 }, { "epoch": 8.43, "eval_loss": 0.1786525398492813, "eval_runtime": 2.6579, "eval_samples_per_second": 864.232, "eval_steps_per_second": 13.545, "step": 788000 }, { "epoch": 8.44, "learning_rate": 2.6434798492857228e-05, "loss": 0.1881, "step": 788500 }, { "epoch": 8.44, "learning_rate": 2.6360359692145757e-05, "loss": 0.1882, "step": 789000 }, { "epoch": 8.44, "eval_loss": 0.17897970974445343, "eval_runtime": 2.6253, "eval_samples_per_second": 874.933, "eval_steps_per_second": 13.712, "step": 789000 }, { "epoch": 8.45, "learning_rate": 2.6286067540162677e-05, "loss": 0.1882, "step": 789500 }, { "epoch": 8.45, "learning_rate": 2.6211922240019883e-05, "loss": 0.1883, "step": 790000 }, { "epoch": 8.45, "eval_loss": 0.17872017621994019, "eval_runtime": 2.5868, "eval_samples_per_second": 887.972, "eval_steps_per_second": 13.917, "step": 790000 }, { "epoch": 8.46, "learning_rate": 2.6137923994427768e-05, "loss": 0.1881, "step": 790500 }, { "epoch": 8.46, "learning_rate": 2.6064073005694758e-05, "loss": 0.1876, "step": 791000 }, { "epoch": 8.46, "eval_loss": 0.17775288224220276, "eval_runtime": 2.6373, "eval_samples_per_second": 870.95, "eval_steps_per_second": 13.65, "step": 791000 }, { "epoch": 8.47, "learning_rate": 2.5990369475726598e-05, "loss": 0.1878, "step": 791500 }, { "epoch": 8.48, "learning_rate": 2.591681360602595e-05, "loss": 0.1876, "step": 792000 }, { "epoch": 8.48, "eval_loss": 0.1769527643918991, "eval_runtime": 2.7236, "eval_samples_per_second": 843.383, "eval_steps_per_second": 13.218, "step": 792000 }, { "epoch": 8.48, "learning_rate": 2.5843405597691748e-05, "loss": 0.188, "step": 792500 }, { "epoch": 8.49, "learning_rate": 2.577014565141866e-05, "loss": 0.1872, "step": 793000 }, { "epoch": 8.49, "eval_loss": 0.1795121282339096, "eval_runtime": 2.6103, "eval_samples_per_second": 879.963, "eval_steps_per_second": 13.791, "step": 793000 }, { "epoch": 8.49, "learning_rate": 2.569703396749661e-05, "loss": 0.1875, "step": 793500 }, { "epoch": 8.5, "learning_rate": 2.562407074581014e-05, "loss": 0.188, "step": 794000 }, { "epoch": 8.5, "eval_loss": 0.17704832553863525, "eval_runtime": 2.6547, "eval_samples_per_second": 865.258, "eval_steps_per_second": 13.561, "step": 794000 }, { "epoch": 8.5, "learning_rate": 2.5551256185837897e-05, "loss": 0.1878, "step": 794500 }, { "epoch": 8.51, "learning_rate": 2.5478590486652137e-05, "loss": 0.1879, "step": 795000 }, { "epoch": 8.51, "eval_loss": 0.1775512397289276, "eval_runtime": 2.704, "eval_samples_per_second": 849.487, "eval_steps_per_second": 13.314, "step": 795000 }, { "epoch": 8.51, "learning_rate": 2.5406073846918076e-05, "loss": 0.1873, "step": 795500 }, { "epoch": 8.52, "learning_rate": 2.533370646489347e-05, "loss": 0.1872, "step": 796000 }, { "epoch": 8.52, "eval_loss": 0.17661112546920776, "eval_runtime": 2.6375, "eval_samples_per_second": 870.909, "eval_steps_per_second": 13.649, "step": 796000 }, { "epoch": 8.53, "learning_rate": 2.526148853842796e-05, "loss": 0.1874, "step": 796500 }, { "epoch": 8.53, "learning_rate": 2.5189420264962586e-05, "loss": 0.1875, "step": 797000 }, { "epoch": 8.53, "eval_loss": 0.17768479883670807, "eval_runtime": 2.6644, "eval_samples_per_second": 862.092, "eval_steps_per_second": 13.511, "step": 797000 }, { "epoch": 8.54, "learning_rate": 2.5117501841529297e-05, "loss": 0.1871, "step": 797500 }, { "epoch": 8.54, "learning_rate": 2.504573346475026e-05, "loss": 0.1874, "step": 798000 }, { "epoch": 8.54, "eval_loss": 0.17816244065761566, "eval_runtime": 2.6304, "eval_samples_per_second": 873.254, "eval_steps_per_second": 13.686, "step": 798000 }, { "epoch": 8.55, "learning_rate": 2.497411533083753e-05, "loss": 0.1874, "step": 798500 }, { "epoch": 8.55, "learning_rate": 2.4902647635592324e-05, "loss": 0.187, "step": 799000 }, { "epoch": 8.55, "eval_loss": 0.17790637910366058, "eval_runtime": 2.6765, "eval_samples_per_second": 858.201, "eval_steps_per_second": 13.45, "step": 799000 }, { "epoch": 8.56, "learning_rate": 2.483133057440458e-05, "loss": 0.1871, "step": 799500 }, { "epoch": 8.57, "learning_rate": 2.476016434225246e-05, "loss": 0.1872, "step": 800000 }, { "epoch": 8.57, "eval_loss": 0.17626047134399414, "eval_runtime": 2.6916, "eval_samples_per_second": 853.408, "eval_steps_per_second": 13.375, "step": 800000 }, { "epoch": 8.57, "learning_rate": 2.4689149133701672e-05, "loss": 0.1867, "step": 800500 }, { "epoch": 8.58, "learning_rate": 2.461828514290513e-05, "loss": 0.1869, "step": 801000 }, { "epoch": 8.58, "eval_loss": 0.17704518139362335, "eval_runtime": 2.7376, "eval_samples_per_second": 839.048, "eval_steps_per_second": 13.15, "step": 801000 }, { "epoch": 8.58, "learning_rate": 2.4547572563602267e-05, "loss": 0.1872, "step": 801500 }, { "epoch": 8.59, "learning_rate": 2.447701158911855e-05, "loss": 0.1868, "step": 802000 }, { "epoch": 8.59, "eval_loss": 0.17794357240200043, "eval_runtime": 2.6487, "eval_samples_per_second": 867.219, "eval_steps_per_second": 13.592, "step": 802000 }, { "epoch": 8.59, "learning_rate": 2.4406602412365027e-05, "loss": 0.187, "step": 802500 }, { "epoch": 8.6, "learning_rate": 2.4336345225837658e-05, "loss": 0.1872, "step": 803000 }, { "epoch": 8.6, "eval_loss": 0.1776154637336731, "eval_runtime": 2.7206, "eval_samples_per_second": 844.313, "eval_steps_per_second": 13.233, "step": 803000 }, { "epoch": 8.6, "learning_rate": 2.4266240221616956e-05, "loss": 0.1873, "step": 803500 }, { "epoch": 8.61, "learning_rate": 2.4196287591367296e-05, "loss": 0.1868, "step": 804000 }, { "epoch": 8.61, "eval_loss": 0.176628977060318, "eval_runtime": 2.6337, "eval_samples_per_second": 872.144, "eval_steps_per_second": 13.669, "step": 804000 }, { "epoch": 8.62, "learning_rate": 2.412648752633649e-05, "loss": 0.1869, "step": 804500 }, { "epoch": 8.62, "learning_rate": 2.405684021735527e-05, "loss": 0.1866, "step": 805000 }, { "epoch": 8.62, "eval_loss": 0.1774420291185379, "eval_runtime": 2.6036, "eval_samples_per_second": 882.223, "eval_steps_per_second": 13.827, "step": 805000 }, { "epoch": 8.63, "learning_rate": 2.39873458548367e-05, "loss": 0.1871, "step": 805500 }, { "epoch": 8.63, "learning_rate": 2.3918004628775736e-05, "loss": 0.1871, "step": 806000 }, { "epoch": 8.63, "eval_loss": 0.1766408383846283, "eval_runtime": 2.6553, "eval_samples_per_second": 865.055, "eval_steps_per_second": 13.558, "step": 806000 }, { "epoch": 8.64, "learning_rate": 2.3848816728748643e-05, "loss": 0.187, "step": 806500 }, { "epoch": 8.64, "learning_rate": 2.3779782343912463e-05, "loss": 0.1871, "step": 807000 }, { "epoch": 8.64, "eval_loss": 0.17578239738941193, "eval_runtime": 2.6261, "eval_samples_per_second": 874.674, "eval_steps_per_second": 13.708, "step": 807000 }, { "epoch": 8.65, "learning_rate": 2.3710901663004604e-05, "loss": 0.1867, "step": 807500 }, { "epoch": 8.65, "learning_rate": 2.364217487434221e-05, "loss": 0.1867, "step": 808000 }, { "epoch": 8.65, "eval_loss": 0.17686133086681366, "eval_runtime": 2.5799, "eval_samples_per_second": 890.356, "eval_steps_per_second": 13.954, "step": 808000 }, { "epoch": 8.66, "learning_rate": 2.3573602165821668e-05, "loss": 0.187, "step": 808500 }, { "epoch": 8.67, "learning_rate": 2.3505183724918196e-05, "loss": 0.1867, "step": 809000 }, { "epoch": 8.67, "eval_loss": 0.1787070780992508, "eval_runtime": 2.6677, "eval_samples_per_second": 861.032, "eval_steps_per_second": 13.495, "step": 809000 }, { "epoch": 8.67, "learning_rate": 2.3436919738685132e-05, "loss": 0.1864, "step": 809500 }, { "epoch": 8.68, "learning_rate": 2.3368810393753687e-05, "loss": 0.1866, "step": 810000 }, { "epoch": 8.68, "eval_loss": 0.17782823741436005, "eval_runtime": 2.6203, "eval_samples_per_second": 876.614, "eval_steps_per_second": 13.739, "step": 810000 }, { "epoch": 8.68, "learning_rate": 2.3300855876332162e-05, "loss": 0.1868, "step": 810500 }, { "epoch": 8.69, "learning_rate": 2.32330563722056e-05, "loss": 0.1864, "step": 811000 }, { "epoch": 8.69, "eval_loss": 0.1779273897409439, "eval_runtime": 2.7339, "eval_samples_per_second": 840.196, "eval_steps_per_second": 13.168, "step": 811000 }, { "epoch": 8.69, "learning_rate": 2.316541206673529e-05, "loss": 0.1865, "step": 811500 }, { "epoch": 8.7, "learning_rate": 2.309792314485815e-05, "loss": 0.1866, "step": 812000 }, { "epoch": 8.7, "eval_loss": 0.17686782777309418, "eval_runtime": 2.8998, "eval_samples_per_second": 792.124, "eval_steps_per_second": 12.415, "step": 812000 }, { "epoch": 8.7, "learning_rate": 2.3030589791086353e-05, "loss": 0.1865, "step": 812500 }, { "epoch": 8.71, "learning_rate": 2.2963412189506695e-05, "loss": 0.1869, "step": 813000 }, { "epoch": 8.71, "eval_loss": 0.1769571304321289, "eval_runtime": 2.6694, "eval_samples_per_second": 860.483, "eval_steps_per_second": 13.486, "step": 813000 }, { "epoch": 8.72, "learning_rate": 2.2896390523780156e-05, "loss": 0.1865, "step": 813500 }, { "epoch": 8.72, "learning_rate": 2.282952497714145e-05, "loss": 0.186, "step": 814000 }, { "epoch": 8.72, "eval_loss": 0.17855176329612732, "eval_runtime": 2.6874, "eval_samples_per_second": 854.735, "eval_steps_per_second": 13.396, "step": 814000 }, { "epoch": 8.73, "learning_rate": 2.2762815732398387e-05, "loss": 0.1862, "step": 814500 }, { "epoch": 8.73, "learning_rate": 2.2696262971931538e-05, "loss": 0.1863, "step": 815000 }, { "epoch": 8.73, "eval_loss": 0.17720898985862732, "eval_runtime": 2.6031, "eval_samples_per_second": 882.4, "eval_steps_per_second": 13.83, "step": 815000 }, { "epoch": 8.74, "learning_rate": 2.2629866877693577e-05, "loss": 0.1865, "step": 815500 }, { "epoch": 8.74, "learning_rate": 2.2563627631208887e-05, "loss": 0.1869, "step": 816000 }, { "epoch": 8.74, "eval_loss": 0.17861302196979523, "eval_runtime": 2.6611, "eval_samples_per_second": 863.188, "eval_steps_per_second": 13.528, "step": 816000 }, { "epoch": 8.75, "learning_rate": 2.2497545413573065e-05, "loss": 0.1863, "step": 816500 }, { "epoch": 8.75, "learning_rate": 2.2431620405452336e-05, "loss": 0.1859, "step": 817000 }, { "epoch": 8.75, "eval_loss": 0.17606213688850403, "eval_runtime": 2.6726, "eval_samples_per_second": 859.474, "eval_steps_per_second": 13.47, "step": 817000 }, { "epoch": 8.76, "learning_rate": 2.23658527870832e-05, "loss": 0.1863, "step": 817500 }, { "epoch": 8.77, "learning_rate": 2.230024273827179e-05, "loss": 0.1862, "step": 818000 }, { "epoch": 8.77, "eval_loss": 0.17465642094612122, "eval_runtime": 2.6831, "eval_samples_per_second": 856.102, "eval_steps_per_second": 13.417, "step": 818000 }, { "epoch": 8.77, "learning_rate": 2.223479043839345e-05, "loss": 0.1867, "step": 818500 }, { "epoch": 8.78, "learning_rate": 2.216949606639231e-05, "loss": 0.1863, "step": 819000 }, { "epoch": 8.78, "eval_loss": 0.17773117125034332, "eval_runtime": 2.6871, "eval_samples_per_second": 854.84, "eval_steps_per_second": 13.398, "step": 819000 }, { "epoch": 8.78, "learning_rate": 2.2104359800780665e-05, "loss": 0.1859, "step": 819500 }, { "epoch": 8.79, "learning_rate": 2.2039381819638596e-05, "loss": 0.186, "step": 820000 }, { "epoch": 8.79, "eval_loss": 0.1770503968000412, "eval_runtime": 2.6357, "eval_samples_per_second": 871.505, "eval_steps_per_second": 13.659, "step": 820000 }, { "epoch": 8.79, "learning_rate": 2.1974562300613417e-05, "loss": 0.186, "step": 820500 }, { "epoch": 8.8, "learning_rate": 2.1909901420919184e-05, "loss": 0.1856, "step": 821000 }, { "epoch": 8.8, "eval_loss": 0.17747129499912262, "eval_runtime": 2.664, "eval_samples_per_second": 862.246, "eval_steps_per_second": 13.514, "step": 821000 }, { "epoch": 8.8, "learning_rate": 2.1845399357336326e-05, "loss": 0.186, "step": 821500 }, { "epoch": 8.81, "learning_rate": 2.1781056286210997e-05, "loss": 0.186, "step": 822000 }, { "epoch": 8.81, "eval_loss": 0.1773909628391266, "eval_runtime": 2.5828, "eval_samples_per_second": 889.354, "eval_steps_per_second": 13.939, "step": 822000 }, { "epoch": 8.82, "learning_rate": 2.1716872383454674e-05, "loss": 0.1861, "step": 822500 }, { "epoch": 8.82, "learning_rate": 2.1652847824543744e-05, "loss": 0.1856, "step": 823000 }, { "epoch": 8.82, "eval_loss": 0.1759449690580368, "eval_runtime": 2.6867, "eval_samples_per_second": 854.948, "eval_steps_per_second": 13.399, "step": 823000 }, { "epoch": 8.83, "learning_rate": 2.1588982784518853e-05, "loss": 0.1862, "step": 823500 }, { "epoch": 8.83, "learning_rate": 2.1525277437984636e-05, "loss": 0.1857, "step": 824000 }, { "epoch": 8.83, "eval_loss": 0.1774652898311615, "eval_runtime": 2.6123, "eval_samples_per_second": 879.304, "eval_steps_per_second": 13.781, "step": 824000 }, { "epoch": 8.84, "learning_rate": 2.1461731959109053e-05, "loss": 0.186, "step": 824500 }, { "epoch": 8.84, "learning_rate": 2.1398346521623e-05, "loss": 0.1857, "step": 825000 }, { "epoch": 8.84, "eval_loss": 0.17699038982391357, "eval_runtime": 2.654, "eval_samples_per_second": 865.476, "eval_steps_per_second": 13.564, "step": 825000 }, { "epoch": 8.85, "learning_rate": 2.1335121298819867e-05, "loss": 0.1859, "step": 825500 }, { "epoch": 8.86, "learning_rate": 2.1272056463554978e-05, "loss": 0.1862, "step": 826000 }, { "epoch": 8.86, "eval_loss": 0.17693667113780975, "eval_runtime": 2.6428, "eval_samples_per_second": 869.15, "eval_steps_per_second": 13.622, "step": 826000 }, { "epoch": 8.86, "learning_rate": 2.1209152188245214e-05, "loss": 0.1858, "step": 826500 }, { "epoch": 8.87, "learning_rate": 2.114640864486845e-05, "loss": 0.1857, "step": 827000 }, { "epoch": 8.87, "eval_loss": 0.1788521409034729, "eval_runtime": 2.6742, "eval_samples_per_second": 858.952, "eval_steps_per_second": 13.462, "step": 827000 }, { "epoch": 8.87, "learning_rate": 2.1083826004963102e-05, "loss": 0.1859, "step": 827500 }, { "epoch": 8.88, "learning_rate": 2.1021404439627775e-05, "loss": 0.1855, "step": 828000 }, { "epoch": 8.88, "eval_loss": 0.17763476073741913, "eval_runtime": 2.5581, "eval_samples_per_second": 897.942, "eval_steps_per_second": 14.073, "step": 828000 }, { "epoch": 8.88, "learning_rate": 2.09591441195206e-05, "loss": 0.1856, "step": 828500 }, { "epoch": 8.89, "learning_rate": 2.089704521485896e-05, "loss": 0.1858, "step": 829000 }, { "epoch": 8.89, "eval_loss": 0.17711400985717773, "eval_runtime": 2.6039, "eval_samples_per_second": 882.145, "eval_steps_per_second": 13.826, "step": 829000 }, { "epoch": 8.89, "learning_rate": 2.083510789541883e-05, "loss": 0.1852, "step": 829500 }, { "epoch": 8.9, "learning_rate": 2.0773332330534513e-05, "loss": 0.1857, "step": 830000 }, { "epoch": 8.9, "eval_loss": 0.17438167333602905, "eval_runtime": 2.6569, "eval_samples_per_second": 864.555, "eval_steps_per_second": 13.55, "step": 830000 }, { "epoch": 8.91, "learning_rate": 2.0711718689098057e-05, "loss": 0.1858, "step": 830500 }, { "epoch": 8.91, "learning_rate": 2.0650267139558772e-05, "loss": 0.1854, "step": 831000 }, { "epoch": 8.91, "eval_loss": 0.17501012980937958, "eval_runtime": 2.6667, "eval_samples_per_second": 861.35, "eval_steps_per_second": 13.5, "step": 831000 }, { "epoch": 8.92, "learning_rate": 2.058897784992289e-05, "loss": 0.1855, "step": 831500 }, { "epoch": 8.92, "learning_rate": 2.052785098775293e-05, "loss": 0.1855, "step": 832000 }, { "epoch": 8.92, "eval_loss": 0.17379425466060638, "eval_runtime": 2.675, "eval_samples_per_second": 858.689, "eval_steps_per_second": 13.458, "step": 832000 }, { "epoch": 8.93, "learning_rate": 2.0466886720167436e-05, "loss": 0.1847, "step": 832500 }, { "epoch": 8.93, "learning_rate": 2.04060852138404e-05, "loss": 0.1854, "step": 833000 }, { "epoch": 8.93, "eval_loss": 0.1764400452375412, "eval_runtime": 2.6031, "eval_samples_per_second": 882.425, "eval_steps_per_second": 13.83, "step": 833000 }, { "epoch": 8.94, "learning_rate": 2.0345446635000783e-05, "loss": 0.1856, "step": 833500 }, { "epoch": 8.94, "learning_rate": 2.028497114943219e-05, "loss": 0.1851, "step": 834000 }, { "epoch": 8.94, "eval_loss": 0.17593778669834137, "eval_runtime": 2.5824, "eval_samples_per_second": 889.497, "eval_steps_per_second": 13.941, "step": 834000 }, { "epoch": 8.95, "learning_rate": 2.022465892247223e-05, "loss": 0.1855, "step": 834500 }, { "epoch": 8.96, "learning_rate": 2.0164510119012263e-05, "loss": 0.1849, "step": 835000 }, { "epoch": 8.96, "eval_loss": 0.1772100031375885, "eval_runtime": 2.6877, "eval_samples_per_second": 854.619, "eval_steps_per_second": 13.394, "step": 835000 }, { "epoch": 8.96, "learning_rate": 2.0104524903496834e-05, "loss": 0.1852, "step": 835500 }, { "epoch": 8.97, "learning_rate": 2.0044703439923217e-05, "loss": 0.1854, "step": 836000 }, { "epoch": 8.97, "eval_loss": 0.17744192481040955, "eval_runtime": 2.6114, "eval_samples_per_second": 879.619, "eval_steps_per_second": 13.786, "step": 836000 }, { "epoch": 8.97, "learning_rate": 1.998504589184101e-05, "loss": 0.1851, "step": 836500 }, { "epoch": 8.98, "learning_rate": 1.9925552422351654e-05, "loss": 0.1849, "step": 837000 }, { "epoch": 8.98, "eval_loss": 0.1755765676498413, "eval_runtime": 2.6326, "eval_samples_per_second": 872.529, "eval_steps_per_second": 13.675, "step": 837000 }, { "epoch": 8.98, "learning_rate": 1.9866223194108028e-05, "loss": 0.1851, "step": 837500 }, { "epoch": 8.99, "learning_rate": 1.9807058369314016e-05, "loss": 0.1845, "step": 838000 }, { "epoch": 8.99, "eval_loss": 0.17676672339439392, "eval_runtime": 2.6846, "eval_samples_per_second": 855.61, "eval_steps_per_second": 13.41, "step": 838000 }, { "epoch": 8.99, "learning_rate": 1.9748058109723953e-05, "loss": 0.1852, "step": 838500 }, { "epoch": 9.0, "learning_rate": 1.968922257664231e-05, "loss": 0.1853, "step": 839000 }, { "epoch": 9.0, "eval_loss": 0.17678546905517578, "eval_runtime": 2.6872, "eval_samples_per_second": 854.778, "eval_steps_per_second": 13.397, "step": 839000 }, { "epoch": 9.01, "learning_rate": 1.9630551930923155e-05, "loss": 0.1851, "step": 839500 }, { "epoch": 9.01, "learning_rate": 1.9572046332969825e-05, "loss": 0.1848, "step": 840000 }, { "epoch": 9.01, "eval_loss": 0.1751183122396469, "eval_runtime": 2.6372, "eval_samples_per_second": 870.984, "eval_steps_per_second": 13.651, "step": 840000 }, { "epoch": 9.02, "learning_rate": 1.95137059427344e-05, "loss": 0.1851, "step": 840500 }, { "epoch": 9.02, "learning_rate": 1.945553091971727e-05, "loss": 0.1852, "step": 841000 }, { "epoch": 9.02, "eval_loss": 0.17605267465114594, "eval_runtime": 2.6415, "eval_samples_per_second": 869.593, "eval_steps_per_second": 13.629, "step": 841000 }, { "epoch": 9.03, "learning_rate": 1.93975214229667e-05, "loss": 0.185, "step": 841500 }, { "epoch": 9.03, "learning_rate": 1.933967761107847e-05, "loss": 0.1846, "step": 842000 }, { "epoch": 9.03, "eval_loss": 0.17334681749343872, "eval_runtime": 2.679, "eval_samples_per_second": 857.418, "eval_steps_per_second": 13.438, "step": 842000 }, { "epoch": 9.04, "learning_rate": 1.928199964219533e-05, "loss": 0.1851, "step": 842500 }, { "epoch": 9.04, "learning_rate": 1.9224487674006694e-05, "loss": 0.1848, "step": 843000 }, { "epoch": 9.04, "eval_loss": 0.17702366411685944, "eval_runtime": 2.7205, "eval_samples_per_second": 844.336, "eval_steps_per_second": 13.233, "step": 843000 }, { "epoch": 9.05, "learning_rate": 1.9167141863748015e-05, "loss": 0.1848, "step": 843500 }, { "epoch": 9.06, "learning_rate": 1.9109962368200602e-05, "loss": 0.1846, "step": 844000 }, { "epoch": 9.06, "eval_loss": 0.17539054155349731, "eval_runtime": 2.6244, "eval_samples_per_second": 875.248, "eval_steps_per_second": 13.717, "step": 844000 }, { "epoch": 9.06, "learning_rate": 1.9052949343690977e-05, "loss": 0.1848, "step": 844500 }, { "epoch": 9.07, "learning_rate": 1.8996102946090586e-05, "loss": 0.1846, "step": 845000 }, { "epoch": 9.07, "eval_loss": 0.17318959534168243, "eval_runtime": 2.7201, "eval_samples_per_second": 844.445, "eval_steps_per_second": 13.235, "step": 845000 }, { "epoch": 9.07, "learning_rate": 1.8939423330815345e-05, "loss": 0.1847, "step": 845500 }, { "epoch": 9.08, "learning_rate": 1.888291065282509e-05, "loss": 0.1847, "step": 846000 }, { "epoch": 9.08, "eval_loss": 0.17560191452503204, "eval_runtime": 2.7101, "eval_samples_per_second": 847.565, "eval_steps_per_second": 13.284, "step": 846000 }, { "epoch": 9.08, "learning_rate": 1.882656506662338e-05, "loss": 0.1846, "step": 846500 }, { "epoch": 9.09, "learning_rate": 1.8770386726256865e-05, "loss": 0.1844, "step": 847000 }, { "epoch": 9.09, "eval_loss": 0.17197825014591217, "eval_runtime": 2.7201, "eval_samples_per_second": 844.459, "eval_steps_per_second": 13.235, "step": 847000 }, { "epoch": 9.09, "learning_rate": 1.8714375785315006e-05, "loss": 0.1845, "step": 847500 }, { "epoch": 9.1, "learning_rate": 1.8658532396929565e-05, "loss": 0.184, "step": 848000 }, { "epoch": 9.1, "eval_loss": 0.17653484642505646, "eval_runtime": 2.6616, "eval_samples_per_second": 863.031, "eval_steps_per_second": 13.526, "step": 848000 }, { "epoch": 9.11, "learning_rate": 1.8602856713774208e-05, "loss": 0.1843, "step": 848500 }, { "epoch": 9.11, "learning_rate": 1.8547348888064178e-05, "loss": 0.1848, "step": 849000 }, { "epoch": 9.11, "eval_loss": 0.1734277456998825, "eval_runtime": 2.6737, "eval_samples_per_second": 859.112, "eval_steps_per_second": 13.465, "step": 849000 }, { "epoch": 9.12, "learning_rate": 1.8492009071555703e-05, "loss": 0.1846, "step": 849500 }, { "epoch": 9.12, "learning_rate": 1.8436837415545772e-05, "loss": 0.1848, "step": 850000 }, { "epoch": 9.12, "eval_loss": 0.17425018548965454, "eval_runtime": 2.6779, "eval_samples_per_second": 857.76, "eval_steps_per_second": 13.443, "step": 850000 }, { "epoch": 9.13, "learning_rate": 1.838183407087156e-05, "loss": 0.1843, "step": 850500 }, { "epoch": 9.13, "learning_rate": 1.8326999187910095e-05, "loss": 0.1843, "step": 851000 }, { "epoch": 9.13, "eval_loss": 0.17367926239967346, "eval_runtime": 2.6854, "eval_samples_per_second": 855.351, "eval_steps_per_second": 13.406, "step": 851000 }, { "epoch": 9.14, "learning_rate": 1.8272332916577875e-05, "loss": 0.1846, "step": 851500 }, { "epoch": 9.15, "learning_rate": 1.8217835406330415e-05, "loss": 0.1844, "step": 852000 }, { "epoch": 9.15, "eval_loss": 0.17384441196918488, "eval_runtime": 2.6384, "eval_samples_per_second": 870.617, "eval_steps_per_second": 13.645, "step": 852000 }, { "epoch": 9.15, "learning_rate": 1.81635068061618e-05, "loss": 0.1844, "step": 852500 }, { "epoch": 9.16, "learning_rate": 1.810934726460436e-05, "loss": 0.1845, "step": 853000 }, { "epoch": 9.16, "eval_loss": 0.17389260232448578, "eval_runtime": 2.6711, "eval_samples_per_second": 859.942, "eval_steps_per_second": 13.478, "step": 853000 }, { "epoch": 9.16, "learning_rate": 1.80553569297282e-05, "loss": 0.1843, "step": 853500 }, { "epoch": 9.17, "learning_rate": 1.800153594914084e-05, "loss": 0.1843, "step": 854000 }, { "epoch": 9.17, "eval_loss": 0.17477978765964508, "eval_runtime": 2.681, "eval_samples_per_second": 856.769, "eval_steps_per_second": 13.428, "step": 854000 }, { "epoch": 9.17, "learning_rate": 1.7947884469986816e-05, "loss": 0.1841, "step": 854500 }, { "epoch": 9.18, "learning_rate": 1.7894402638947176e-05, "loss": 0.1841, "step": 855000 }, { "epoch": 9.18, "eval_loss": 0.1744370311498642, "eval_runtime": 2.6199, "eval_samples_per_second": 876.754, "eval_steps_per_second": 13.741, "step": 855000 }, { "epoch": 9.18, "learning_rate": 1.7841090602239237e-05, "loss": 0.1841, "step": 855500 }, { "epoch": 9.19, "learning_rate": 1.778794850561604e-05, "loss": 0.1844, "step": 856000 }, { "epoch": 9.19, "eval_loss": 0.17599613964557648, "eval_runtime": 2.6232, "eval_samples_per_second": 875.655, "eval_steps_per_second": 13.724, "step": 856000 }, { "epoch": 9.2, "learning_rate": 1.7734976494366073e-05, "loss": 0.1837, "step": 856500 }, { "epoch": 9.2, "learning_rate": 1.7682174713312805e-05, "loss": 0.1843, "step": 857000 }, { "epoch": 9.2, "eval_loss": 0.17385347187519073, "eval_runtime": 2.7089, "eval_samples_per_second": 847.948, "eval_steps_per_second": 13.29, "step": 857000 }, { "epoch": 9.21, "learning_rate": 1.7629543306814255e-05, "loss": 0.1838, "step": 857500 }, { "epoch": 9.21, "learning_rate": 1.75770824187627e-05, "loss": 0.1839, "step": 858000 }, { "epoch": 9.21, "eval_loss": 0.17458127439022064, "eval_runtime": 2.75, "eval_samples_per_second": 835.274, "eval_steps_per_second": 13.091, "step": 858000 }, { "epoch": 9.22, "learning_rate": 1.7524792192584186e-05, "loss": 0.1843, "step": 858500 }, { "epoch": 9.22, "learning_rate": 1.747267277123821e-05, "loss": 0.1839, "step": 859000 }, { "epoch": 9.22, "eval_loss": 0.1746589094400406, "eval_runtime": 2.6359, "eval_samples_per_second": 871.432, "eval_steps_per_second": 13.658, "step": 859000 }, { "epoch": 9.23, "learning_rate": 1.74207242972173e-05, "loss": 0.1837, "step": 859500 }, { "epoch": 9.23, "learning_rate": 1.7368946912546556e-05, "loss": 0.1836, "step": 860000 }, { "epoch": 9.23, "eval_loss": 0.1775263249874115, "eval_runtime": 2.6639, "eval_samples_per_second": 862.261, "eval_steps_per_second": 13.514, "step": 860000 }, { "epoch": 9.24, "learning_rate": 1.7317340758783407e-05, "loss": 0.1835, "step": 860500 }, { "epoch": 9.25, "learning_rate": 1.726590597701708e-05, "loss": 0.1842, "step": 861000 }, { "epoch": 9.25, "eval_loss": 0.17551767826080322, "eval_runtime": 2.6099, "eval_samples_per_second": 880.108, "eval_steps_per_second": 13.794, "step": 861000 }, { "epoch": 9.25, "learning_rate": 1.7214642707868325e-05, "loss": 0.1839, "step": 861500 }, { "epoch": 9.26, "learning_rate": 1.7163551091488952e-05, "loss": 0.1839, "step": 862000 }, { "epoch": 9.26, "eval_loss": 0.17372268438339233, "eval_runtime": 2.641, "eval_samples_per_second": 869.76, "eval_steps_per_second": 13.631, "step": 862000 }, { "epoch": 9.26, "learning_rate": 1.711263126756148e-05, "loss": 0.1841, "step": 862500 }, { "epoch": 9.27, "learning_rate": 1.7061883375298788e-05, "loss": 0.1834, "step": 863000 }, { "epoch": 9.27, "eval_loss": 0.17352163791656494, "eval_runtime": 2.6082, "eval_samples_per_second": 880.7, "eval_steps_per_second": 13.803, "step": 863000 }, { "epoch": 9.27, "learning_rate": 1.7011307553443647e-05, "loss": 0.1837, "step": 863500 }, { "epoch": 9.28, "learning_rate": 1.6960903940268456e-05, "loss": 0.1836, "step": 864000 }, { "epoch": 9.28, "eval_loss": 0.17270448803901672, "eval_runtime": 2.6998, "eval_samples_per_second": 850.79, "eval_steps_per_second": 13.334, "step": 864000 }, { "epoch": 9.28, "learning_rate": 1.6910672673574746e-05, "loss": 0.1838, "step": 864500 }, { "epoch": 9.29, "learning_rate": 1.6860613890692876e-05, "loss": 0.1832, "step": 865000 }, { "epoch": 9.29, "eval_loss": 0.1736259162425995, "eval_runtime": 2.674, "eval_samples_per_second": 859.019, "eval_steps_per_second": 13.463, "step": 865000 }, { "epoch": 9.3, "learning_rate": 1.6810727728481673e-05, "loss": 0.1833, "step": 865500 }, { "epoch": 9.3, "learning_rate": 1.6761014323327962e-05, "loss": 0.1835, "step": 866000 }, { "epoch": 9.3, "eval_loss": 0.17491458356380463, "eval_runtime": 2.6701, "eval_samples_per_second": 860.268, "eval_steps_per_second": 13.483, "step": 866000 }, { "epoch": 9.31, "learning_rate": 1.6711473811146333e-05, "loss": 0.1836, "step": 866500 }, { "epoch": 9.31, "learning_rate": 1.6662106327378645e-05, "loss": 0.1837, "step": 867000 }, { "epoch": 9.31, "eval_loss": 0.17311297357082367, "eval_runtime": 2.5749, "eval_samples_per_second": 892.083, "eval_steps_per_second": 13.981, "step": 867000 }, { "epoch": 9.32, "learning_rate": 1.6612912006993688e-05, "loss": 0.1835, "step": 867500 }, { "epoch": 9.32, "learning_rate": 1.6563890984486884e-05, "loss": 0.1834, "step": 868000 }, { "epoch": 9.32, "eval_loss": 0.1740087866783142, "eval_runtime": 2.6738, "eval_samples_per_second": 859.085, "eval_steps_per_second": 13.464, "step": 868000 }, { "epoch": 9.33, "learning_rate": 1.6515043393879825e-05, "loss": 0.1837, "step": 868500 }, { "epoch": 9.33, "learning_rate": 1.6466369368719955e-05, "loss": 0.1834, "step": 869000 }, { "epoch": 9.33, "eval_loss": 0.17570127546787262, "eval_runtime": 2.6358, "eval_samples_per_second": 871.447, "eval_steps_per_second": 13.658, "step": 869000 }, { "epoch": 9.34, "learning_rate": 1.641786904208022e-05, "loss": 0.1833, "step": 869500 }, { "epoch": 9.35, "learning_rate": 1.6369542546558626e-05, "loss": 0.1835, "step": 870000 }, { "epoch": 9.35, "eval_loss": 0.17456747591495514, "eval_runtime": 2.7306, "eval_samples_per_second": 841.196, "eval_steps_per_second": 13.184, "step": 870000 }, { "epoch": 9.35, "learning_rate": 1.6321390014277996e-05, "loss": 0.1831, "step": 870500 }, { "epoch": 9.36, "learning_rate": 1.6273411576885517e-05, "loss": 0.1836, "step": 871000 }, { "epoch": 9.36, "eval_loss": 0.17539818584918976, "eval_runtime": 2.6712, "eval_samples_per_second": 859.901, "eval_steps_per_second": 13.477, "step": 871000 }, { "epoch": 9.36, "learning_rate": 1.6225607365552378e-05, "loss": 0.1831, "step": 871500 }, { "epoch": 9.37, "learning_rate": 1.617797751097349e-05, "loss": 0.1832, "step": 872000 }, { "epoch": 9.37, "eval_loss": 0.1717691868543625, "eval_runtime": 2.6798, "eval_samples_per_second": 857.157, "eval_steps_per_second": 13.434, "step": 872000 }, { "epoch": 9.37, "learning_rate": 1.6130522143367032e-05, "loss": 0.1832, "step": 872500 }, { "epoch": 9.38, "learning_rate": 1.608324139247421e-05, "loss": 0.1835, "step": 873000 }, { "epoch": 9.38, "eval_loss": 0.1719122976064682, "eval_runtime": 2.6225, "eval_samples_per_second": 875.898, "eval_steps_per_second": 13.728, "step": 873000 }, { "epoch": 9.38, "learning_rate": 1.6036135387558756e-05, "loss": 0.1831, "step": 873500 }, { "epoch": 9.39, "learning_rate": 1.5989204257406693e-05, "loss": 0.1833, "step": 874000 }, { "epoch": 9.39, "eval_loss": 0.17478306591510773, "eval_runtime": 2.6101, "eval_samples_per_second": 880.046, "eval_steps_per_second": 13.793, "step": 874000 }, { "epoch": 9.4, "learning_rate": 1.594244813032595e-05, "loss": 0.1829, "step": 874500 }, { "epoch": 9.4, "learning_rate": 1.5895867134145974e-05, "loss": 0.1829, "step": 875000 }, { "epoch": 9.4, "eval_loss": 0.17394264042377472, "eval_runtime": 2.5878, "eval_samples_per_second": 887.623, "eval_steps_per_second": 13.911, "step": 875000 }, { "epoch": 9.41, "learning_rate": 1.5849461396217467e-05, "loss": 0.1834, "step": 875500 }, { "epoch": 9.41, "learning_rate": 1.5803231043411912e-05, "loss": 0.1827, "step": 876000 }, { "epoch": 9.41, "eval_loss": 0.17351944744586945, "eval_runtime": 2.6686, "eval_samples_per_second": 860.761, "eval_steps_per_second": 13.49, "step": 876000 }, { "epoch": 9.42, "learning_rate": 1.575717620212132e-05, "loss": 0.183, "step": 876500 }, { "epoch": 9.42, "learning_rate": 1.5711296998257902e-05, "loss": 0.1832, "step": 877000 }, { "epoch": 9.42, "eval_loss": 0.17347006499767303, "eval_runtime": 2.7428, "eval_samples_per_second": 837.47, "eval_steps_per_second": 13.125, "step": 877000 }, { "epoch": 9.43, "learning_rate": 1.5665593557253623e-05, "loss": 0.1833, "step": 877500 }, { "epoch": 9.44, "learning_rate": 1.562006600405996e-05, "loss": 0.1829, "step": 878000 }, { "epoch": 9.44, "eval_loss": 0.1734461635351181, "eval_runtime": 2.6113, "eval_samples_per_second": 879.646, "eval_steps_per_second": 13.786, "step": 878000 }, { "epoch": 9.44, "learning_rate": 1.5574714463147512e-05, "loss": 0.1831, "step": 878500 }, { "epoch": 9.45, "learning_rate": 1.5529539058505624e-05, "loss": 0.183, "step": 879000 }, { "epoch": 9.45, "eval_loss": 0.17375677824020386, "eval_runtime": 2.5315, "eval_samples_per_second": 907.374, "eval_steps_per_second": 14.221, "step": 879000 }, { "epoch": 9.45, "learning_rate": 1.5484539913642175e-05, "loss": 0.1826, "step": 879500 }, { "epoch": 9.46, "learning_rate": 1.543971715158307e-05, "loss": 0.1828, "step": 880000 }, { "epoch": 9.46, "eval_loss": 0.17431409657001495, "eval_runtime": 2.6398, "eval_samples_per_second": 870.144, "eval_steps_per_second": 13.637, "step": 880000 }, { "epoch": 9.46, "learning_rate": 1.539507089487205e-05, "loss": 0.183, "step": 880500 }, { "epoch": 9.47, "learning_rate": 1.535060126557028e-05, "loss": 0.1829, "step": 881000 }, { "epoch": 9.47, "eval_loss": 0.17408204078674316, "eval_runtime": 2.6439, "eval_samples_per_second": 868.809, "eval_steps_per_second": 13.617, "step": 881000 }, { "epoch": 9.47, "learning_rate": 1.5306308385255997e-05, "loss": 0.1827, "step": 881500 }, { "epoch": 9.48, "learning_rate": 1.5262192375024284e-05, "loss": 0.1827, "step": 882000 }, { "epoch": 9.48, "eval_loss": 0.17428572475910187, "eval_runtime": 2.6251, "eval_samples_per_second": 875.0, "eval_steps_per_second": 13.714, "step": 882000 }, { "epoch": 9.49, "learning_rate": 1.521825335548661e-05, "loss": 0.1832, "step": 882500 }, { "epoch": 9.49, "learning_rate": 1.5174491446770566e-05, "loss": 0.1827, "step": 883000 }, { "epoch": 9.49, "eval_loss": 0.17153075337409973, "eval_runtime": 2.6515, "eval_samples_per_second": 866.317, "eval_steps_per_second": 13.577, "step": 883000 }, { "epoch": 9.5, "learning_rate": 1.5130906768519563e-05, "loss": 0.1827, "step": 883500 }, { "epoch": 9.5, "learning_rate": 1.508749943989242e-05, "loss": 0.183, "step": 884000 }, { "epoch": 9.5, "eval_loss": 0.17301537096500397, "eval_runtime": 2.656, "eval_samples_per_second": 864.819, "eval_steps_per_second": 13.554, "step": 884000 }, { "epoch": 9.51, "learning_rate": 1.5044269579563144e-05, "loss": 0.1825, "step": 884500 }, { "epoch": 9.51, "learning_rate": 1.500121730572051e-05, "loss": 0.183, "step": 885000 }, { "epoch": 9.51, "eval_loss": 0.17374014854431152, "eval_runtime": 2.719, "eval_samples_per_second": 844.787, "eval_steps_per_second": 13.24, "step": 885000 }, { "epoch": 9.52, "learning_rate": 1.4958342736067783e-05, "loss": 0.1829, "step": 885500 }, { "epoch": 9.52, "learning_rate": 1.4915645987822406e-05, "loss": 0.1829, "step": 886000 }, { "epoch": 9.52, "eval_loss": 0.17604438960552216, "eval_runtime": 2.7026, "eval_samples_per_second": 849.921, "eval_steps_per_second": 13.32, "step": 886000 }, { "epoch": 9.53, "learning_rate": 1.4873127177715653e-05, "loss": 0.1827, "step": 886500 }, { "epoch": 9.54, "learning_rate": 1.4830786421992347e-05, "loss": 0.1829, "step": 887000 }, { "epoch": 9.54, "eval_loss": 0.17339639365673065, "eval_runtime": 2.7392, "eval_samples_per_second": 838.573, "eval_steps_per_second": 13.143, "step": 887000 }, { "epoch": 9.54, "learning_rate": 1.4788623836410479e-05, "loss": 0.1823, "step": 887500 }, { "epoch": 9.55, "learning_rate": 1.4746639536240942e-05, "loss": 0.1824, "step": 888000 }, { "epoch": 9.55, "eval_loss": 0.17382191121578217, "eval_runtime": 2.7435, "eval_samples_per_second": 837.254, "eval_steps_per_second": 13.122, "step": 888000 }, { "epoch": 9.55, "learning_rate": 1.4704833636267232e-05, "loss": 0.1825, "step": 888500 }, { "epoch": 9.56, "learning_rate": 1.4663206250785055e-05, "loss": 0.1824, "step": 889000 }, { "epoch": 9.56, "eval_loss": 0.17390523850917816, "eval_runtime": 2.7145, "eval_samples_per_second": 846.211, "eval_steps_per_second": 13.262, "step": 889000 }, { "epoch": 9.56, "learning_rate": 1.4621757493602125e-05, "loss": 0.1826, "step": 889500 }, { "epoch": 9.57, "learning_rate": 1.4580487478037748e-05, "loss": 0.1826, "step": 890000 }, { "epoch": 9.57, "eval_loss": 0.17268939316272736, "eval_runtime": 2.6865, "eval_samples_per_second": 855.002, "eval_steps_per_second": 13.4, "step": 890000 }, { "epoch": 9.57, "learning_rate": 1.4539396316922552e-05, "loss": 0.1822, "step": 890500 }, { "epoch": 9.58, "learning_rate": 1.4498484122598232e-05, "loss": 0.1824, "step": 891000 }, { "epoch": 9.58, "eval_loss": 0.17072877287864685, "eval_runtime": 2.7539, "eval_samples_per_second": 834.091, "eval_steps_per_second": 13.072, "step": 891000 }, { "epoch": 9.59, "learning_rate": 1.4457751006917137e-05, "loss": 0.1822, "step": 891500 }, { "epoch": 9.59, "learning_rate": 1.4417197081242083e-05, "loss": 0.1829, "step": 892000 }, { "epoch": 9.59, "eval_loss": 0.17135965824127197, "eval_runtime": 2.7156, "eval_samples_per_second": 845.864, "eval_steps_per_second": 13.257, "step": 892000 }, { "epoch": 9.6, "learning_rate": 1.4376822456445926e-05, "loss": 0.1825, "step": 892500 }, { "epoch": 9.6, "learning_rate": 1.433662724291136e-05, "loss": 0.1828, "step": 893000 }, { "epoch": 9.6, "eval_loss": 0.17201776802539825, "eval_runtime": 2.6531, "eval_samples_per_second": 865.779, "eval_steps_per_second": 13.569, "step": 893000 }, { "epoch": 9.61, "learning_rate": 1.4296611550530563e-05, "loss": 0.1823, "step": 893500 }, { "epoch": 9.61, "learning_rate": 1.4256775488704904e-05, "loss": 0.1825, "step": 894000 }, { "epoch": 9.61, "eval_loss": 0.17262525856494904, "eval_runtime": 2.6435, "eval_samples_per_second": 868.939, "eval_steps_per_second": 13.619, "step": 894000 }, { "epoch": 9.62, "learning_rate": 1.4217119166344665e-05, "loss": 0.1822, "step": 894500 }, { "epoch": 9.62, "learning_rate": 1.4177642691868717e-05, "loss": 0.1819, "step": 895000 }, { "epoch": 9.62, "eval_loss": 0.1716921329498291, "eval_runtime": 2.6607, "eval_samples_per_second": 863.304, "eval_steps_per_second": 13.53, "step": 895000 }, { "epoch": 9.63, "learning_rate": 1.4138346173204218e-05, "loss": 0.1823, "step": 895500 }, { "epoch": 9.64, "learning_rate": 1.4099229717786368e-05, "loss": 0.1825, "step": 896000 }, { "epoch": 9.64, "eval_loss": 0.1738402545452118, "eval_runtime": 2.7328, "eval_samples_per_second": 840.538, "eval_steps_per_second": 13.173, "step": 896000 }, { "epoch": 9.64, "learning_rate": 1.406029343255806e-05, "loss": 0.1823, "step": 896500 }, { "epoch": 9.65, "learning_rate": 1.4021537423969588e-05, "loss": 0.1823, "step": 897000 }, { "epoch": 9.65, "eval_loss": 0.1751101166009903, "eval_runtime": 2.7169, "eval_samples_per_second": 845.459, "eval_steps_per_second": 13.251, "step": 897000 }, { "epoch": 9.65, "learning_rate": 1.3982961797978431e-05, "loss": 0.1819, "step": 897500 }, { "epoch": 9.66, "learning_rate": 1.3944566660048863e-05, "loss": 0.1823, "step": 898000 }, { "epoch": 9.66, "eval_loss": 0.1745595484972, "eval_runtime": 2.78, "eval_samples_per_second": 826.25, "eval_steps_per_second": 12.95, "step": 898000 }, { "epoch": 9.66, "learning_rate": 1.3906352115151725e-05, "loss": 0.1821, "step": 898500 }, { "epoch": 9.67, "learning_rate": 1.3868318267764128e-05, "loss": 0.1821, "step": 899000 }, { "epoch": 9.67, "eval_loss": 0.17514048516750336, "eval_runtime": 2.6917, "eval_samples_per_second": 853.365, "eval_steps_per_second": 13.374, "step": 899000 }, { "epoch": 9.67, "learning_rate": 1.3830465221869146e-05, "loss": 0.1821, "step": 899500 }, { "epoch": 9.68, "learning_rate": 1.3792793080955574e-05, "loss": 0.1819, "step": 900000 }, { "epoch": 9.68, "eval_loss": 0.17348013818264008, "eval_runtime": 2.6872, "eval_samples_per_second": 854.803, "eval_steps_per_second": 13.397, "step": 900000 }, { "epoch": 9.69, "learning_rate": 1.3755301948017599e-05, "loss": 0.1822, "step": 900500 }, { "epoch": 9.69, "learning_rate": 1.3717991925554562e-05, "loss": 0.1821, "step": 901000 }, { "epoch": 9.69, "eval_loss": 0.17332369089126587, "eval_runtime": 2.5587, "eval_samples_per_second": 897.731, "eval_steps_per_second": 14.07, "step": 901000 }, { "epoch": 9.7, "learning_rate": 1.368086311557062e-05, "loss": 0.1821, "step": 901500 }, { "epoch": 9.7, "learning_rate": 1.3643915619574529e-05, "loss": 0.1821, "step": 902000 }, { "epoch": 9.7, "eval_loss": 0.1713598072528839, "eval_runtime": 2.7303, "eval_samples_per_second": 841.295, "eval_steps_per_second": 13.185, "step": 902000 }, { "epoch": 9.71, "learning_rate": 1.3607149538579341e-05, "loss": 0.182, "step": 902500 }, { "epoch": 9.71, "learning_rate": 1.35705649731021e-05, "loss": 0.1816, "step": 903000 }, { "epoch": 9.71, "eval_loss": 0.17178992927074432, "eval_runtime": 2.6316, "eval_samples_per_second": 872.853, "eval_steps_per_second": 13.68, "step": 903000 }, { "epoch": 9.72, "learning_rate": 1.3534162023163642e-05, "loss": 0.1821, "step": 903500 }, { "epoch": 9.73, "learning_rate": 1.3497940788288195e-05, "loss": 0.182, "step": 904000 }, { "epoch": 9.73, "eval_loss": 0.17401227355003357, "eval_runtime": 2.6109, "eval_samples_per_second": 879.785, "eval_steps_per_second": 13.789, "step": 904000 }, { "epoch": 9.73, "learning_rate": 1.3461901367503262e-05, "loss": 0.1816, "step": 904500 }, { "epoch": 9.74, "learning_rate": 1.3426043859339253e-05, "loss": 0.1822, "step": 905000 }, { "epoch": 9.74, "eval_loss": 0.16998076438903809, "eval_runtime": 2.6306, "eval_samples_per_second": 873.188, "eval_steps_per_second": 13.685, "step": 905000 }, { "epoch": 9.74, "learning_rate": 1.3390368361829197e-05, "loss": 0.1818, "step": 905500 }, { "epoch": 9.75, "learning_rate": 1.3354874972508582e-05, "loss": 0.1815, "step": 906000 }, { "epoch": 9.75, "eval_loss": 0.17435437440872192, "eval_runtime": 2.7558, "eval_samples_per_second": 833.52, "eval_steps_per_second": 13.063, "step": 906000 }, { "epoch": 9.75, "learning_rate": 1.3319563788414934e-05, "loss": 0.182, "step": 906500 }, { "epoch": 9.76, "learning_rate": 1.3284434906087695e-05, "loss": 0.1823, "step": 907000 }, { "epoch": 9.76, "eval_loss": 0.17397646605968475, "eval_runtime": 2.6707, "eval_samples_per_second": 860.067, "eval_steps_per_second": 13.479, "step": 907000 }, { "epoch": 9.76, "learning_rate": 1.3249488421567911e-05, "loss": 0.1818, "step": 907500 }, { "epoch": 9.77, "learning_rate": 1.3214724430397915e-05, "loss": 0.1817, "step": 908000 }, { "epoch": 9.77, "eval_loss": 0.17298473417758942, "eval_runtime": 2.659, "eval_samples_per_second": 863.865, "eval_steps_per_second": 13.539, "step": 908000 }, { "epoch": 9.78, "learning_rate": 1.3180143027621145e-05, "loss": 0.1819, "step": 908500 }, { "epoch": 9.78, "learning_rate": 1.314574430778182e-05, "loss": 0.1817, "step": 909000 }, { "epoch": 9.78, "eval_loss": 0.17148981988430023, "eval_runtime": 2.7078, "eval_samples_per_second": 848.287, "eval_steps_per_second": 13.295, "step": 909000 }, { "epoch": 9.79, "learning_rate": 1.311152836492473e-05, "loss": 0.1817, "step": 909500 }, { "epoch": 9.79, "learning_rate": 1.3077495292594966e-05, "loss": 0.1817, "step": 910000 }, { "epoch": 9.79, "eval_loss": 0.17322474718093872, "eval_runtime": 2.6678, "eval_samples_per_second": 861.024, "eval_steps_per_second": 13.495, "step": 910000 }, { "epoch": 9.8, "learning_rate": 1.3043645183837645e-05, "loss": 0.1817, "step": 910500 }, { "epoch": 9.8, "learning_rate": 1.3009978131197669e-05, "loss": 0.1821, "step": 911000 }, { "epoch": 9.8, "eval_loss": 0.17097479104995728, "eval_runtime": 2.7046, "eval_samples_per_second": 849.303, "eval_steps_per_second": 13.311, "step": 911000 }, { "epoch": 9.81, "learning_rate": 1.297649422671947e-05, "loss": 0.1815, "step": 911500 }, { "epoch": 9.81, "learning_rate": 1.2943193561946762e-05, "loss": 0.1814, "step": 912000 }, { "epoch": 9.81, "eval_loss": 0.1718176305294037, "eval_runtime": 2.6645, "eval_samples_per_second": 862.075, "eval_steps_per_second": 13.511, "step": 912000 }, { "epoch": 9.82, "learning_rate": 1.291007622792231e-05, "loss": 0.1816, "step": 912500 }, { "epoch": 9.83, "learning_rate": 1.2877142315187628e-05, "loss": 0.1818, "step": 913000 }, { "epoch": 9.83, "eval_loss": 0.1710846871137619, "eval_runtime": 2.7026, "eval_samples_per_second": 849.924, "eval_steps_per_second": 13.321, "step": 913000 }, { "epoch": 9.83, "learning_rate": 1.2844391913782773e-05, "loss": 0.182, "step": 913500 }, { "epoch": 9.84, "learning_rate": 1.28118251132461e-05, "loss": 0.1815, "step": 914000 }, { "epoch": 9.84, "eval_loss": 0.17333008348941803, "eval_runtime": 2.5502, "eval_samples_per_second": 900.72, "eval_steps_per_second": 14.117, "step": 914000 }, { "epoch": 9.84, "learning_rate": 1.2779442002613984e-05, "loss": 0.1814, "step": 914500 }, { "epoch": 9.85, "learning_rate": 1.274724267042063e-05, "loss": 0.1814, "step": 915000 }, { "epoch": 9.85, "eval_loss": 0.1717572808265686, "eval_runtime": 2.636, "eval_samples_per_second": 871.395, "eval_steps_per_second": 13.657, "step": 915000 }, { "epoch": 9.85, "learning_rate": 1.2715227204697775e-05, "loss": 0.1814, "step": 915500 }, { "epoch": 9.86, "learning_rate": 1.2683395692974472e-05, "loss": 0.1819, "step": 916000 }, { "epoch": 9.86, "eval_loss": 0.17225094139575958, "eval_runtime": 2.6373, "eval_samples_per_second": 870.95, "eval_steps_per_second": 13.65, "step": 916000 }, { "epoch": 9.86, "learning_rate": 1.2651748222276879e-05, "loss": 0.1815, "step": 916500 }, { "epoch": 9.87, "learning_rate": 1.2620284879127947e-05, "loss": 0.1816, "step": 917000 }, { "epoch": 9.87, "eval_loss": 0.17146611213684082, "eval_runtime": 2.7493, "eval_samples_per_second": 835.499, "eval_steps_per_second": 13.094, "step": 917000 }, { "epoch": 9.88, "learning_rate": 1.2589005749547281e-05, "loss": 0.1815, "step": 917500 }, { "epoch": 9.88, "learning_rate": 1.2557910919050803e-05, "loss": 0.1813, "step": 918000 }, { "epoch": 9.88, "eval_loss": 0.16947948932647705, "eval_runtime": 2.7503, "eval_samples_per_second": 835.189, "eval_steps_per_second": 13.09, "step": 918000 }, { "epoch": 9.89, "learning_rate": 1.2527000472650597e-05, "loss": 0.1815, "step": 918500 }, { "epoch": 9.89, "learning_rate": 1.2496274494854666e-05, "loss": 0.1812, "step": 919000 }, { "epoch": 9.89, "eval_loss": 0.17127934098243713, "eval_runtime": 2.6734, "eval_samples_per_second": 859.205, "eval_steps_per_second": 13.466, "step": 919000 }, { "epoch": 9.9, "learning_rate": 1.2465733069666629e-05, "loss": 0.1813, "step": 919500 }, { "epoch": 9.9, "learning_rate": 1.24353762805856e-05, "loss": 0.1814, "step": 920000 }, { "epoch": 9.9, "eval_loss": 0.17229017615318298, "eval_runtime": 2.6708, "eval_samples_per_second": 860.053, "eval_steps_per_second": 13.479, "step": 920000 }, { "epoch": 9.91, "learning_rate": 1.240520421060586e-05, "loss": 0.1818, "step": 920500 }, { "epoch": 9.91, "learning_rate": 1.2375216942216713e-05, "loss": 0.1817, "step": 921000 }, { "epoch": 9.91, "eval_loss": 0.17294897139072418, "eval_runtime": 2.7216, "eval_samples_per_second": 843.974, "eval_steps_per_second": 13.227, "step": 921000 }, { "epoch": 9.92, "learning_rate": 1.2345414557402198e-05, "loss": 0.1811, "step": 921500 }, { "epoch": 9.93, "learning_rate": 1.2315797137640906e-05, "loss": 0.1814, "step": 922000 }, { "epoch": 9.93, "eval_loss": 0.1728929877281189, "eval_runtime": 2.6867, "eval_samples_per_second": 854.941, "eval_steps_per_second": 13.399, "step": 922000 }, { "epoch": 9.93, "learning_rate": 1.2286364763905723e-05, "loss": 0.1813, "step": 922500 }, { "epoch": 9.94, "learning_rate": 1.225711751666363e-05, "loss": 0.1815, "step": 923000 }, { "epoch": 9.94, "eval_loss": 0.17273372411727905, "eval_runtime": 2.6851, "eval_samples_per_second": 855.459, "eval_steps_per_second": 13.407, "step": 923000 }, { "epoch": 9.94, "learning_rate": 1.2228055475875488e-05, "loss": 0.1811, "step": 923500 }, { "epoch": 9.95, "learning_rate": 1.2199178720995825e-05, "loss": 0.1814, "step": 924000 }, { "epoch": 9.95, "eval_loss": 0.17107011377811432, "eval_runtime": 2.7626, "eval_samples_per_second": 831.466, "eval_steps_per_second": 13.031, "step": 924000 }, { "epoch": 9.95, "learning_rate": 1.217048733097256e-05, "loss": 0.181, "step": 924500 }, { "epoch": 9.96, "learning_rate": 1.2141981384246874e-05, "loss": 0.1808, "step": 925000 }, { "epoch": 9.96, "eval_loss": 0.17153695225715637, "eval_runtime": 2.6072, "eval_samples_per_second": 881.021, "eval_steps_per_second": 13.808, "step": 925000 }, { "epoch": 9.96, "learning_rate": 1.211366095875293e-05, "loss": 0.1811, "step": 925500 }, { "epoch": 9.97, "learning_rate": 1.2085526131917685e-05, "loss": 0.181, "step": 926000 }, { "epoch": 9.97, "eval_loss": 0.1717982143163681, "eval_runtime": 2.6346, "eval_samples_per_second": 871.846, "eval_steps_per_second": 13.664, "step": 926000 }, { "epoch": 9.98, "learning_rate": 1.2057576980660691e-05, "loss": 0.181, "step": 926500 }, { "epoch": 9.98, "learning_rate": 1.2029813581393866e-05, "loss": 0.1813, "step": 927000 }, { "epoch": 9.98, "eval_loss": 0.17178404331207275, "eval_runtime": 2.6835, "eval_samples_per_second": 855.987, "eval_steps_per_second": 13.416, "step": 927000 }, { "epoch": 9.99, "learning_rate": 1.2002236010021269e-05, "loss": 0.1811, "step": 927500 }, { "epoch": 9.99, "learning_rate": 1.197484434193893e-05, "loss": 0.1811, "step": 928000 }, { "epoch": 9.99, "eval_loss": 0.17153653502464294, "eval_runtime": 2.6987, "eval_samples_per_second": 851.135, "eval_steps_per_second": 13.34, "step": 928000 }, { "epoch": 10.0, "learning_rate": 1.1947638652034617e-05, "loss": 0.1809, "step": 928500 }, { "epoch": 10.0, "learning_rate": 1.192061901468768e-05, "loss": 0.1811, "step": 929000 }, { "epoch": 10.0, "eval_loss": 0.17235822975635529, "eval_runtime": 2.6438, "eval_samples_per_second": 868.809, "eval_steps_per_second": 13.617, "step": 929000 }, { "epoch": 10.01, "learning_rate": 1.1893785503768736e-05, "loss": 0.1812, "step": 929500 }, { "epoch": 10.02, "learning_rate": 1.1867138192639601e-05, "loss": 0.1809, "step": 930000 }, { "epoch": 10.02, "eval_loss": 0.1709609031677246, "eval_runtime": 2.6178, "eval_samples_per_second": 877.459, "eval_steps_per_second": 13.752, "step": 930000 }, { "epoch": 10.02, "learning_rate": 1.1840677154152987e-05, "loss": 0.181, "step": 930500 }, { "epoch": 10.03, "learning_rate": 1.1814402460652382e-05, "loss": 0.1811, "step": 931000 }, { "epoch": 10.03, "eval_loss": 0.17144934833049774, "eval_runtime": 2.7383, "eval_samples_per_second": 838.849, "eval_steps_per_second": 13.147, "step": 931000 }, { "epoch": 10.03, "learning_rate": 1.178831418397181e-05, "loss": 0.181, "step": 931500 }, { "epoch": 10.04, "learning_rate": 1.176241239543558e-05, "loss": 0.181, "step": 932000 }, { "epoch": 10.04, "eval_loss": 0.17210912704467773, "eval_runtime": 2.6312, "eval_samples_per_second": 872.975, "eval_steps_per_second": 13.682, "step": 932000 }, { "epoch": 10.04, "learning_rate": 1.173669716585822e-05, "loss": 0.1809, "step": 932500 }, { "epoch": 10.05, "learning_rate": 1.171116856554418e-05, "loss": 0.1809, "step": 933000 }, { "epoch": 10.05, "eval_loss": 0.17279262840747833, "eval_runtime": 2.687, "eval_samples_per_second": 854.858, "eval_steps_per_second": 13.398, "step": 933000 }, { "epoch": 10.05, "learning_rate": 1.168582666428768e-05, "loss": 0.1809, "step": 933500 }, { "epoch": 10.06, "learning_rate": 1.1660671531372517e-05, "loss": 0.1807, "step": 934000 }, { "epoch": 10.06, "eval_loss": 0.17214839160442352, "eval_runtime": 2.6862, "eval_samples_per_second": 855.103, "eval_steps_per_second": 13.402, "step": 934000 }, { "epoch": 10.07, "learning_rate": 1.1635703235571846e-05, "loss": 0.181, "step": 934500 }, { "epoch": 10.07, "learning_rate": 1.1610921845148052e-05, "loss": 0.1805, "step": 935000 }, { "epoch": 10.07, "eval_loss": 0.17261220514774323, "eval_runtime": 2.7622, "eval_samples_per_second": 831.593, "eval_steps_per_second": 13.033, "step": 935000 }, { "epoch": 10.08, "learning_rate": 1.1586327427852503e-05, "loss": 0.1805, "step": 935500 }, { "epoch": 10.08, "learning_rate": 1.156192005092539e-05, "loss": 0.1807, "step": 936000 }, { "epoch": 10.08, "eval_loss": 0.17041905224323273, "eval_runtime": 2.643, "eval_samples_per_second": 869.101, "eval_steps_per_second": 13.621, "step": 936000 }, { "epoch": 10.09, "learning_rate": 1.153769978109557e-05, "loss": 0.1806, "step": 936500 }, { "epoch": 10.09, "learning_rate": 1.1513666684580308e-05, "loss": 0.1809, "step": 937000 }, { "epoch": 10.09, "eval_loss": 0.1718713790178299, "eval_runtime": 2.6411, "eval_samples_per_second": 869.707, "eval_steps_per_second": 13.631, "step": 937000 }, { "epoch": 10.1, "learning_rate": 1.1489820827085185e-05, "loss": 0.1808, "step": 937500 }, { "epoch": 10.1, "learning_rate": 1.1466162273803876e-05, "loss": 0.1809, "step": 938000 }, { "epoch": 10.1, "eval_loss": 0.17236891388893127, "eval_runtime": 2.4881, "eval_samples_per_second": 923.197, "eval_steps_per_second": 14.469, "step": 938000 }, { "epoch": 10.11, "learning_rate": 1.144269108941795e-05, "loss": 0.1808, "step": 938500 }, { "epoch": 10.12, "learning_rate": 1.1419407338096732e-05, "loss": 0.1807, "step": 939000 }, { "epoch": 10.12, "eval_loss": 0.17213864624500275, "eval_runtime": 2.697, "eval_samples_per_second": 851.681, "eval_steps_per_second": 13.348, "step": 939000 }, { "epoch": 10.12, "learning_rate": 1.1396311083497103e-05, "loss": 0.1808, "step": 939500 }, { "epoch": 10.13, "learning_rate": 1.1373402388763346e-05, "loss": 0.1806, "step": 940000 }, { "epoch": 10.13, "eval_loss": 0.17225030064582825, "eval_runtime": 2.5852, "eval_samples_per_second": 888.512, "eval_steps_per_second": 13.925, "step": 940000 }, { "epoch": 10.13, "learning_rate": 1.1350681316526965e-05, "loss": 0.1805, "step": 940500 }, { "epoch": 10.14, "learning_rate": 1.1328147928906494e-05, "loss": 0.1809, "step": 941000 }, { "epoch": 10.14, "eval_loss": 0.1728110909461975, "eval_runtime": 2.6803, "eval_samples_per_second": 857.002, "eval_steps_per_second": 13.431, "step": 941000 }, { "epoch": 10.14, "learning_rate": 1.1305802287507358e-05, "loss": 0.1806, "step": 941500 }, { "epoch": 10.15, "learning_rate": 1.1283644453421678e-05, "loss": 0.1806, "step": 942000 }, { "epoch": 10.15, "eval_loss": 0.17073097825050354, "eval_runtime": 2.5804, "eval_samples_per_second": 890.183, "eval_steps_per_second": 13.951, "step": 942000 }, { "epoch": 10.15, "learning_rate": 1.1261674487228149e-05, "loss": 0.1805, "step": 942500 }, { "epoch": 10.16, "learning_rate": 1.1239892448991798e-05, "loss": 0.1806, "step": 943000 }, { "epoch": 10.16, "eval_loss": 0.17256046831607819, "eval_runtime": 2.657, "eval_samples_per_second": 864.515, "eval_steps_per_second": 13.549, "step": 943000 }, { "epoch": 10.17, "learning_rate": 1.1218298398263894e-05, "loss": 0.1808, "step": 943500 }, { "epoch": 10.17, "learning_rate": 1.1196892394081743e-05, "loss": 0.1803, "step": 944000 }, { "epoch": 10.17, "eval_loss": 0.1697072833776474, "eval_runtime": 2.5585, "eval_samples_per_second": 897.801, "eval_steps_per_second": 14.071, "step": 944000 }, { "epoch": 10.18, "learning_rate": 1.1175674494968552e-05, "loss": 0.1803, "step": 944500 }, { "epoch": 10.18, "learning_rate": 1.1154644758933235e-05, "loss": 0.1807, "step": 945000 }, { "epoch": 10.18, "eval_loss": 0.17261387407779694, "eval_runtime": 2.6868, "eval_samples_per_second": 854.925, "eval_steps_per_second": 13.399, "step": 945000 }, { "epoch": 10.19, "learning_rate": 1.11338032434703e-05, "loss": 0.1804, "step": 945500 }, { "epoch": 10.19, "learning_rate": 1.1113150005559644e-05, "loss": 0.1808, "step": 946000 }, { "epoch": 10.19, "eval_loss": 0.17092828452587128, "eval_runtime": 2.6611, "eval_samples_per_second": 863.19, "eval_steps_per_second": 13.528, "step": 946000 }, { "epoch": 10.2, "learning_rate": 1.1092685101666438e-05, "loss": 0.1806, "step": 946500 }, { "epoch": 10.2, "learning_rate": 1.1072408587740942e-05, "loss": 0.1804, "step": 947000 }, { "epoch": 10.2, "eval_loss": 0.17135068774223328, "eval_runtime": 2.7301, "eval_samples_per_second": 841.37, "eval_steps_per_second": 13.186, "step": 947000 }, { "epoch": 10.21, "learning_rate": 1.1052320519218383e-05, "loss": 0.1804, "step": 947500 }, { "epoch": 10.22, "learning_rate": 1.1032420951018755e-05, "loss": 0.1806, "step": 948000 }, { "epoch": 10.22, "eval_loss": 0.16970402002334595, "eval_runtime": 2.5839, "eval_samples_per_second": 888.964, "eval_steps_per_second": 13.932, "step": 948000 }, { "epoch": 10.22, "learning_rate": 1.1012709937546722e-05, "loss": 0.1805, "step": 948500 }, { "epoch": 10.23, "learning_rate": 1.0993187532691458e-05, "loss": 0.1804, "step": 949000 }, { "epoch": 10.23, "eval_loss": 0.17099051177501678, "eval_runtime": 2.7073, "eval_samples_per_second": 848.444, "eval_steps_per_second": 13.297, "step": 949000 }, { "epoch": 10.23, "learning_rate": 1.0973853789826454e-05, "loss": 0.1804, "step": 949500 }, { "epoch": 10.24, "learning_rate": 1.0954708761809438e-05, "loss": 0.1806, "step": 950000 }, { "epoch": 10.24, "eval_loss": 0.1725110560655594, "eval_runtime": 2.6133, "eval_samples_per_second": 878.965, "eval_steps_per_second": 13.776, "step": 950000 }, { "epoch": 10.24, "learning_rate": 1.0935752500982175e-05, "loss": 0.1805, "step": 950500 }, { "epoch": 10.25, "learning_rate": 1.091698505917036e-05, "loss": 0.1804, "step": 951000 }, { "epoch": 10.25, "eval_loss": 0.1698637306690216, "eval_runtime": 2.5965, "eval_samples_per_second": 884.656, "eval_steps_per_second": 13.865, "step": 951000 }, { "epoch": 10.25, "learning_rate": 1.0898406487683472e-05, "loss": 0.1805, "step": 951500 }, { "epoch": 10.26, "learning_rate": 1.0880016837314599e-05, "loss": 0.1803, "step": 952000 }, { "epoch": 10.26, "eval_loss": 0.17085076868534088, "eval_runtime": 2.596, "eval_samples_per_second": 884.806, "eval_steps_per_second": 13.867, "step": 952000 }, { "epoch": 10.27, "learning_rate": 1.0861816158340365e-05, "loss": 0.1807, "step": 952500 }, { "epoch": 10.27, "learning_rate": 1.084380450052071e-05, "loss": 0.1803, "step": 953000 }, { "epoch": 10.27, "eval_loss": 0.17190536856651306, "eval_runtime": 2.595, "eval_samples_per_second": 885.153, "eval_steps_per_second": 13.873, "step": 953000 }, { "epoch": 10.28, "learning_rate": 1.0825981913098828e-05, "loss": 0.1799, "step": 953500 }, { "epoch": 10.28, "learning_rate": 1.0808348444801e-05, "loss": 0.1802, "step": 954000 }, { "epoch": 10.28, "eval_loss": 0.16949187219142914, "eval_runtime": 2.6166, "eval_samples_per_second": 877.869, "eval_steps_per_second": 13.758, "step": 954000 }, { "epoch": 10.29, "learning_rate": 1.0790904143836438e-05, "loss": 0.1804, "step": 954500 }, { "epoch": 10.29, "learning_rate": 1.0773649057897206e-05, "loss": 0.1802, "step": 955000 }, { "epoch": 10.29, "eval_loss": 0.16995471715927124, "eval_runtime": 2.6165, "eval_samples_per_second": 877.886, "eval_steps_per_second": 13.759, "step": 955000 }, { "epoch": 10.3, "learning_rate": 1.0756583234158057e-05, "loss": 0.1799, "step": 955500 }, { "epoch": 10.31, "learning_rate": 1.073970671927628e-05, "loss": 0.1802, "step": 956000 }, { "epoch": 10.31, "eval_loss": 0.17191793024539948, "eval_runtime": 2.6164, "eval_samples_per_second": 877.931, "eval_steps_per_second": 13.759, "step": 956000 }, { "epoch": 10.31, "learning_rate": 1.0723019559391643e-05, "loss": 0.1804, "step": 956500 }, { "epoch": 10.32, "learning_rate": 1.0706521800126198e-05, "loss": 0.18, "step": 957000 }, { "epoch": 10.32, "eval_loss": 0.17065568268299103, "eval_runtime": 2.6326, "eval_samples_per_second": 872.521, "eval_steps_per_second": 13.675, "step": 957000 }, { "epoch": 10.32, "learning_rate": 1.0690213486584175e-05, "loss": 0.18, "step": 957500 }, { "epoch": 10.33, "learning_rate": 1.0674094663351906e-05, "loss": 0.18, "step": 958000 }, { "epoch": 10.33, "eval_loss": 0.1698225736618042, "eval_runtime": 2.6744, "eval_samples_per_second": 858.898, "eval_steps_per_second": 13.461, "step": 958000 }, { "epoch": 10.33, "learning_rate": 1.0658165374497611e-05, "loss": 0.1804, "step": 958500 }, { "epoch": 10.34, "learning_rate": 1.0642425663571383e-05, "loss": 0.1802, "step": 959000 }, { "epoch": 10.34, "eval_loss": 0.17188780009746552, "eval_runtime": 2.6352, "eval_samples_per_second": 871.663, "eval_steps_per_second": 13.661, "step": 959000 }, { "epoch": 10.34, "learning_rate": 1.062687557360497e-05, "loss": 0.1802, "step": 959500 }, { "epoch": 10.35, "learning_rate": 1.0611515147111736e-05, "loss": 0.1802, "step": 960000 }, { "epoch": 10.35, "eval_loss": 0.16846837103366852, "eval_runtime": 2.7425, "eval_samples_per_second": 837.549, "eval_steps_per_second": 13.127, "step": 960000 }, { "epoch": 10.36, "learning_rate": 1.0596344426086501e-05, "loss": 0.1798, "step": 960500 }, { "epoch": 10.36, "learning_rate": 1.0581363452005424e-05, "loss": 0.1805, "step": 961000 }, { "epoch": 10.36, "eval_loss": 0.17073865234851837, "eval_runtime": 2.5938, "eval_samples_per_second": 885.585, "eval_steps_per_second": 13.879, "step": 961000 }, { "epoch": 10.37, "learning_rate": 1.0566572265825932e-05, "loss": 0.18, "step": 961500 }, { "epoch": 10.37, "learning_rate": 1.0551970907986557e-05, "loss": 0.1801, "step": 962000 }, { "epoch": 10.37, "eval_loss": 0.17134888470172882, "eval_runtime": 2.5813, "eval_samples_per_second": 889.852, "eval_steps_per_second": 13.946, "step": 962000 }, { "epoch": 10.38, "learning_rate": 1.0537559418406849e-05, "loss": 0.18, "step": 962500 }, { "epoch": 10.38, "learning_rate": 1.0523337836487271e-05, "loss": 0.1799, "step": 963000 }, { "epoch": 10.38, "eval_loss": 0.17050015926361084, "eval_runtime": 2.6391, "eval_samples_per_second": 870.378, "eval_steps_per_second": 13.641, "step": 963000 }, { "epoch": 10.39, "learning_rate": 1.0509306201109092e-05, "loss": 0.1801, "step": 963500 }, { "epoch": 10.39, "learning_rate": 1.0495464550634267e-05, "loss": 0.18, "step": 964000 }, { "epoch": 10.39, "eval_loss": 0.17047521471977234, "eval_runtime": 2.6548, "eval_samples_per_second": 865.215, "eval_steps_per_second": 13.56, "step": 964000 }, { "epoch": 10.4, "learning_rate": 1.0481812922905339e-05, "loss": 0.1805, "step": 964500 }, { "epoch": 10.41, "learning_rate": 1.046835135524533e-05, "loss": 0.1798, "step": 965000 }, { "epoch": 10.41, "eval_loss": 0.17172271013259888, "eval_runtime": 2.5812, "eval_samples_per_second": 889.895, "eval_steps_per_second": 13.947, "step": 965000 }, { "epoch": 10.41, "learning_rate": 1.0455079884457653e-05, "loss": 0.1801, "step": 965500 }, { "epoch": 10.42, "learning_rate": 1.044199854682601e-05, "loss": 0.1797, "step": 966000 }, { "epoch": 10.42, "eval_loss": 0.16956347227096558, "eval_runtime": 2.6699, "eval_samples_per_second": 860.34, "eval_steps_per_second": 13.484, "step": 966000 }, { "epoch": 10.42, "learning_rate": 1.0429107378114277e-05, "loss": 0.1802, "step": 966500 }, { "epoch": 10.43, "learning_rate": 1.0416406413566414e-05, "loss": 0.1802, "step": 967000 }, { "epoch": 10.43, "eval_loss": 0.17151953279972076, "eval_runtime": 2.5629, "eval_samples_per_second": 896.252, "eval_steps_per_second": 14.047, "step": 967000 }, { "epoch": 10.43, "learning_rate": 1.0403895687906366e-05, "loss": 0.1803, "step": 967500 }, { "epoch": 10.44, "learning_rate": 1.0391575235337991e-05, "loss": 0.1798, "step": 968000 }, { "epoch": 10.44, "eval_loss": 0.17273712158203125, "eval_runtime": 2.6967, "eval_samples_per_second": 851.792, "eval_steps_per_second": 13.35, "step": 968000 }, { "epoch": 10.44, "learning_rate": 1.0379445089544929e-05, "loss": 0.1799, "step": 968500 }, { "epoch": 10.45, "learning_rate": 1.0367505283690547e-05, "loss": 0.1797, "step": 969000 }, { "epoch": 10.45, "eval_loss": 0.17085492610931396, "eval_runtime": 2.6519, "eval_samples_per_second": 866.18, "eval_steps_per_second": 13.575, "step": 969000 }, { "epoch": 10.46, "learning_rate": 1.0355755850417803e-05, "loss": 0.1797, "step": 969500 }, { "epoch": 10.46, "learning_rate": 1.0344196821849202e-05, "loss": 0.1799, "step": 970000 }, { "epoch": 10.46, "eval_loss": 0.1711302548646927, "eval_runtime": 2.5979, "eval_samples_per_second": 884.178, "eval_steps_per_second": 13.857, "step": 970000 }, { "epoch": 10.82, "learning_rate": 1.0332828229586692e-05, "loss": 0.1799, "step": 970500 }, { "epoch": 10.83, "learning_rate": 1.032165010471157e-05, "loss": 0.1796, "step": 971000 }, { "epoch": 10.83, "eval_loss": 0.17119638621807098, "eval_runtime": 2.5911, "eval_samples_per_second": 886.512, "eval_steps_per_second": 13.894, "step": 971000 }, { "epoch": 10.84, "learning_rate": 1.0310662477784401e-05, "loss": 0.1804, "step": 971500 }, { "epoch": 10.84, "learning_rate": 1.0299865378844936e-05, "loss": 0.1798, "step": 972000 }, { "epoch": 10.84, "eval_loss": 0.1710081547498703, "eval_runtime": 2.5437, "eval_samples_per_second": 903.014, "eval_steps_per_second": 14.153, "step": 972000 }, { "epoch": 10.85, "learning_rate": 1.028925883741203e-05, "loss": 0.18, "step": 972500 }, { "epoch": 10.85, "learning_rate": 1.0278842882483569e-05, "loss": 0.1797, "step": 973000 }, { "epoch": 10.85, "eval_loss": 0.17146818339824677, "eval_runtime": 2.5692, "eval_samples_per_second": 894.045, "eval_steps_per_second": 14.012, "step": 973000 }, { "epoch": 10.86, "learning_rate": 1.026861754253637e-05, "loss": 0.1796, "step": 973500 }, { "epoch": 10.86, "learning_rate": 1.025858284552612e-05, "loss": 0.1797, "step": 974000 }, { "epoch": 10.86, "eval_loss": 0.1706797480583191, "eval_runtime": 2.6865, "eval_samples_per_second": 855.008, "eval_steps_per_second": 13.4, "step": 974000 }, { "epoch": 10.87, "learning_rate": 1.0248738818887307e-05, "loss": 0.1799, "step": 974500 }, { "epoch": 10.87, "learning_rate": 1.023908548953311e-05, "loss": 0.1799, "step": 975000 }, { "epoch": 10.87, "eval_loss": 0.1708817481994629, "eval_runtime": 2.5759, "eval_samples_per_second": 891.738, "eval_steps_per_second": 13.976, "step": 975000 }, { "epoch": 10.88, "learning_rate": 1.0229622883855378e-05, "loss": 0.1798, "step": 975500 }, { "epoch": 10.89, "learning_rate": 1.02203510277245e-05, "loss": 0.1796, "step": 976000 }, { "epoch": 10.89, "eval_loss": 0.1709393560886383, "eval_runtime": 2.6094, "eval_samples_per_second": 880.296, "eval_steps_per_second": 13.797, "step": 976000 }, { "epoch": 10.89, "learning_rate": 1.021126994648939e-05, "loss": 0.1801, "step": 976500 }, { "epoch": 10.9, "learning_rate": 1.0202379664977364e-05, "loss": 0.1799, "step": 977000 }, { "epoch": 10.9, "eval_loss": 0.17174768447875977, "eval_runtime": 2.6289, "eval_samples_per_second": 873.739, "eval_steps_per_second": 13.694, "step": 977000 }, { "epoch": 10.9, "learning_rate": 1.019368020749412e-05, "loss": 0.1797, "step": 977500 }, { "epoch": 10.91, "learning_rate": 1.018517159782365e-05, "loss": 0.1797, "step": 978000 }, { "epoch": 10.91, "eval_loss": 0.16800174117088318, "eval_runtime": 2.57, "eval_samples_per_second": 893.767, "eval_steps_per_second": 14.008, "step": 978000 }, { "epoch": 10.91, "learning_rate": 1.0176853859228149e-05, "loss": 0.1794, "step": 978500 }, { "epoch": 10.92, "learning_rate": 1.0168727014448004e-05, "loss": 0.1794, "step": 979000 }, { "epoch": 10.92, "eval_loss": 0.16953879594802856, "eval_runtime": 2.6173, "eval_samples_per_second": 877.629, "eval_steps_per_second": 13.755, "step": 979000 }, { "epoch": 10.92, "learning_rate": 1.0160791085701714e-05, "loss": 0.1798, "step": 979500 }, { "epoch": 10.93, "learning_rate": 1.0153046094685783e-05, "loss": 0.1794, "step": 980000 }, { "epoch": 10.93, "eval_loss": 0.1709355264902115, "eval_runtime": 2.587, "eval_samples_per_second": 887.915, "eval_steps_per_second": 13.916, "step": 980000 } ], "max_steps": 1000000, "num_train_epochs": 12, "total_flos": 6.869770816498864e+22, "trial_name": null, "trial_params": null }