{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.275652862041678, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 8.333333333333334e-06, "loss": 6.6159, "step": 1000 }, { "epoch": 0.05, "eval_loss": 5.667357444763184, "eval_runtime": 77.1533, "eval_samples_per_second": 53.413, "eval_steps_per_second": 3.344, "step": 1000 }, { "epoch": 0.11, "learning_rate": 1.6666666666666667e-05, "loss": 3.8828, "step": 2000 }, { "epoch": 0.11, "eval_loss": 5.533279895782471, "eval_runtime": 76.0378, "eval_samples_per_second": 54.197, "eval_steps_per_second": 3.393, "step": 2000 }, { "epoch": 0.16, "learning_rate": 2.5e-05, "loss": 3.571, "step": 3000 }, { "epoch": 0.16, "eval_loss": 5.119894504547119, "eval_runtime": 76.0046, "eval_samples_per_second": 54.22, "eval_steps_per_second": 3.395, "step": 3000 }, { "epoch": 0.21, "learning_rate": 3.3333333333333335e-05, "loss": 3.4645, "step": 4000 }, { "epoch": 0.21, "eval_loss": 5.038773536682129, "eval_runtime": 75.6611, "eval_samples_per_second": 54.467, "eval_steps_per_second": 3.41, "step": 4000 }, { "epoch": 0.26, "learning_rate": 4.166666666666667e-05, "loss": 3.3826, "step": 5000 }, { "epoch": 0.26, "eval_loss": 4.901891708374023, "eval_runtime": 76.8157, "eval_samples_per_second": 53.648, "eval_steps_per_second": 3.359, "step": 5000 }, { "epoch": 0.32, "learning_rate": 5e-05, "loss": 3.3503, "step": 6000 }, { "epoch": 0.32, "eval_loss": 5.015623092651367, "eval_runtime": 76.5299, "eval_samples_per_second": 53.848, "eval_steps_per_second": 3.371, "step": 6000 }, { "epoch": 0.37, "learning_rate": 4.9727594660855354e-05, "loss": 3.3038, "step": 7000 }, { "epoch": 0.37, "eval_loss": 4.954394340515137, "eval_runtime": 76.8379, "eval_samples_per_second": 53.632, "eval_steps_per_second": 3.358, "step": 7000 }, { "epoch": 0.42, "learning_rate": 4.9455189321710707e-05, "loss": 3.2411, "step": 8000 }, { "epoch": 0.42, "eval_loss": 4.890042304992676, "eval_runtime": 76.8834, "eval_samples_per_second": 53.601, "eval_steps_per_second": 3.356, "step": 8000 }, { "epoch": 0.47, "learning_rate": 4.918278398256606e-05, "loss": 3.1958, "step": 9000 }, { "epoch": 0.47, "eval_loss": 5.015548229217529, "eval_runtime": 76.9729, "eval_samples_per_second": 53.538, "eval_steps_per_second": 3.352, "step": 9000 }, { "epoch": 0.53, "learning_rate": 4.891037864342141e-05, "loss": 3.1856, "step": 10000 }, { "epoch": 0.53, "eval_loss": 4.8613457679748535, "eval_runtime": 76.9767, "eval_samples_per_second": 53.536, "eval_steps_per_second": 3.352, "step": 10000 }, { "epoch": 0.58, "learning_rate": 4.863797330427677e-05, "loss": 3.1836, "step": 11000 }, { "epoch": 0.58, "eval_loss": 4.832852840423584, "eval_runtime": 76.9463, "eval_samples_per_second": 53.557, "eval_steps_per_second": 3.353, "step": 11000 }, { "epoch": 0.63, "learning_rate": 4.836556796513212e-05, "loss": 3.1566, "step": 12000 }, { "epoch": 0.63, "eval_loss": 4.755970001220703, "eval_runtime": 76.8635, "eval_samples_per_second": 53.615, "eval_steps_per_second": 3.357, "step": 12000 }, { "epoch": 0.69, "learning_rate": 4.809316262598747e-05, "loss": 3.1046, "step": 13000 }, { "epoch": 0.69, "eval_loss": 4.7649054527282715, "eval_runtime": 76.531, "eval_samples_per_second": 53.847, "eval_steps_per_second": 3.371, "step": 13000 }, { "epoch": 0.74, "learning_rate": 4.7820757286842826e-05, "loss": 3.109, "step": 14000 }, { "epoch": 0.74, "eval_loss": 4.723949909210205, "eval_runtime": 76.9877, "eval_samples_per_second": 53.528, "eval_steps_per_second": 3.351, "step": 14000 }, { "epoch": 0.79, "learning_rate": 4.754835194769818e-05, "loss": 3.0862, "step": 15000 }, { "epoch": 0.79, "eval_loss": 4.741213321685791, "eval_runtime": 77.0649, "eval_samples_per_second": 53.474, "eval_steps_per_second": 3.348, "step": 15000 }, { "epoch": 0.84, "learning_rate": 4.727594660855353e-05, "loss": 3.0713, "step": 16000 }, { "epoch": 0.84, "eval_loss": 4.756562232971191, "eval_runtime": 72.732, "eval_samples_per_second": 56.66, "eval_steps_per_second": 3.547, "step": 16000 }, { "epoch": 0.9, "learning_rate": 4.700354126940888e-05, "loss": 3.052, "step": 17000 }, { "epoch": 0.9, "eval_loss": 4.908087730407715, "eval_runtime": 71.9667, "eval_samples_per_second": 57.263, "eval_steps_per_second": 3.585, "step": 17000 }, { "epoch": 0.95, "learning_rate": 4.6731135930264234e-05, "loss": 3.0546, "step": 18000 }, { "epoch": 0.95, "eval_loss": 4.8161492347717285, "eval_runtime": 72.0137, "eval_samples_per_second": 57.225, "eval_steps_per_second": 3.583, "step": 18000 }, { "epoch": 1.0, "learning_rate": 4.6458730591119586e-05, "loss": 3.0308, "step": 19000 }, { "epoch": 1.0, "eval_loss": 4.948371410369873, "eval_runtime": 72.0349, "eval_samples_per_second": 57.208, "eval_steps_per_second": 3.582, "step": 19000 }, { "epoch": 1.06, "learning_rate": 4.6186325251974945e-05, "loss": 2.864, "step": 20000 }, { "epoch": 1.06, "eval_loss": 4.853775978088379, "eval_runtime": 72.0568, "eval_samples_per_second": 57.191, "eval_steps_per_second": 3.581, "step": 20000 }, { "epoch": 1.11, "learning_rate": 4.59139199128303e-05, "loss": 2.8339, "step": 21000 }, { "epoch": 1.11, "eval_loss": 4.888705730438232, "eval_runtime": 72.0694, "eval_samples_per_second": 57.181, "eval_steps_per_second": 3.58, "step": 21000 }, { "epoch": 1.16, "learning_rate": 4.564151457368564e-05, "loss": 2.8388, "step": 22000 }, { "epoch": 1.16, "eval_loss": 4.920900344848633, "eval_runtime": 72.0696, "eval_samples_per_second": 57.181, "eval_steps_per_second": 3.58, "step": 22000 }, { "epoch": 1.21, "learning_rate": 4.5369109234541e-05, "loss": 2.852, "step": 23000 }, { "epoch": 1.21, "eval_loss": 4.990002632141113, "eval_runtime": 72.0496, "eval_samples_per_second": 57.197, "eval_steps_per_second": 3.581, "step": 23000 }, { "epoch": 1.27, "learning_rate": 4.509670389539635e-05, "loss": 2.8415, "step": 24000 }, { "epoch": 1.27, "eval_loss": 4.912019729614258, "eval_runtime": 72.0569, "eval_samples_per_second": 57.191, "eval_steps_per_second": 3.581, "step": 24000 }, { "epoch": 1.32, "learning_rate": 4.4824298556251705e-05, "loss": 2.8435, "step": 25000 }, { "epoch": 1.32, "eval_loss": 4.905246257781982, "eval_runtime": 72.0554, "eval_samples_per_second": 57.192, "eval_steps_per_second": 3.581, "step": 25000 }, { "epoch": 1.37, "learning_rate": 4.455189321710706e-05, "loss": 2.832, "step": 26000 }, { "epoch": 1.37, "eval_loss": 5.011730194091797, "eval_runtime": 72.047, "eval_samples_per_second": 57.199, "eval_steps_per_second": 3.581, "step": 26000 }, { "epoch": 1.42, "learning_rate": 4.427948787796241e-05, "loss": 2.8509, "step": 27000 }, { "epoch": 1.42, "eval_loss": 5.078315734863281, "eval_runtime": 72.0913, "eval_samples_per_second": 57.164, "eval_steps_per_second": 3.579, "step": 27000 }, { "epoch": 1.48, "learning_rate": 4.400708253881776e-05, "loss": 2.8436, "step": 28000 }, { "epoch": 1.48, "eval_loss": 4.865673542022705, "eval_runtime": 72.0906, "eval_samples_per_second": 57.164, "eval_steps_per_second": 3.579, "step": 28000 }, { "epoch": 1.53, "learning_rate": 4.373467719967311e-05, "loss": 2.8558, "step": 29000 }, { "epoch": 1.53, "eval_loss": 4.83981990814209, "eval_runtime": 72.0594, "eval_samples_per_second": 57.189, "eval_steps_per_second": 3.58, "step": 29000 }, { "epoch": 1.58, "learning_rate": 4.346227186052847e-05, "loss": 2.8324, "step": 30000 }, { "epoch": 1.58, "eval_loss": 4.96191930770874, "eval_runtime": 72.0431, "eval_samples_per_second": 57.202, "eval_steps_per_second": 3.581, "step": 30000 }, { "epoch": 1.64, "learning_rate": 4.318986652138382e-05, "loss": 2.8234, "step": 31000 }, { "epoch": 1.64, "eval_loss": 4.830244541168213, "eval_runtime": 72.1429, "eval_samples_per_second": 57.123, "eval_steps_per_second": 3.576, "step": 31000 }, { "epoch": 1.69, "learning_rate": 4.291746118223917e-05, "loss": 2.8155, "step": 32000 }, { "epoch": 1.69, "eval_loss": 4.835384845733643, "eval_runtime": 72.0429, "eval_samples_per_second": 57.202, "eval_steps_per_second": 3.581, "step": 32000 }, { "epoch": 1.74, "learning_rate": 4.264505584309453e-05, "loss": 2.8422, "step": 33000 }, { "epoch": 1.74, "eval_loss": 5.029502868652344, "eval_runtime": 72.0029, "eval_samples_per_second": 57.234, "eval_steps_per_second": 3.583, "step": 33000 }, { "epoch": 1.79, "learning_rate": 4.237265050394988e-05, "loss": 2.8192, "step": 34000 }, { "epoch": 1.79, "eval_loss": 5.054713249206543, "eval_runtime": 72.0962, "eval_samples_per_second": 57.16, "eval_steps_per_second": 3.579, "step": 34000 }, { "epoch": 1.85, "learning_rate": 4.210024516480523e-05, "loss": 2.839, "step": 35000 }, { "epoch": 1.85, "eval_loss": 5.000535488128662, "eval_runtime": 72.0721, "eval_samples_per_second": 57.179, "eval_steps_per_second": 3.58, "step": 35000 }, { "epoch": 1.9, "learning_rate": 4.1827839825660584e-05, "loss": 2.803, "step": 36000 }, { "epoch": 1.9, "eval_loss": 4.954083442687988, "eval_runtime": 72.0788, "eval_samples_per_second": 57.174, "eval_steps_per_second": 3.579, "step": 36000 }, { "epoch": 1.95, "learning_rate": 4.1555434486515936e-05, "loss": 2.8096, "step": 37000 }, { "epoch": 1.95, "eval_loss": 5.285138130187988, "eval_runtime": 72.0675, "eval_samples_per_second": 57.182, "eval_steps_per_second": 3.58, "step": 37000 }, { "epoch": 2.0, "learning_rate": 4.128302914737129e-05, "loss": 2.7969, "step": 38000 }, { "epoch": 2.0, "eval_loss": 5.253526210784912, "eval_runtime": 72.067, "eval_samples_per_second": 57.183, "eval_steps_per_second": 3.58, "step": 38000 }, { "epoch": 2.06, "learning_rate": 4.101062380822665e-05, "loss": 2.61, "step": 39000 }, { "epoch": 2.06, "eval_loss": 5.254916667938232, "eval_runtime": 72.0845, "eval_samples_per_second": 57.169, "eval_steps_per_second": 3.579, "step": 39000 }, { "epoch": 2.11, "learning_rate": 4.073821846908199e-05, "loss": 2.6062, "step": 40000 }, { "epoch": 2.11, "eval_loss": 5.293761730194092, "eval_runtime": 72.1279, "eval_samples_per_second": 57.135, "eval_steps_per_second": 3.577, "step": 40000 }, { "epoch": 2.16, "learning_rate": 4.0465813129937345e-05, "loss": 2.6025, "step": 41000 }, { "epoch": 2.16, "eval_loss": 5.301900863647461, "eval_runtime": 77.2131, "eval_samples_per_second": 53.372, "eval_steps_per_second": 3.341, "step": 41000 }, { "epoch": 2.22, "learning_rate": 4.0193407790792704e-05, "loss": 2.5983, "step": 42000 }, { "epoch": 2.22, "eval_loss": 5.322200775146484, "eval_runtime": 76.5314, "eval_samples_per_second": 53.847, "eval_steps_per_second": 3.371, "step": 42000 }, { "epoch": 2.27, "learning_rate": 3.9921002451648056e-05, "loss": 2.6204, "step": 43000 }, { "epoch": 2.27, "eval_loss": 4.960803031921387, "eval_runtime": 76.6466, "eval_samples_per_second": 53.766, "eval_steps_per_second": 3.366, "step": 43000 }, { "epoch": 2.32, "learning_rate": 3.964859711250341e-05, "loss": 2.6199, "step": 44000 }, { "epoch": 2.32, "eval_loss": 5.222067832946777, "eval_runtime": 76.1563, "eval_samples_per_second": 54.112, "eval_steps_per_second": 3.388, "step": 44000 }, { "epoch": 2.37, "learning_rate": 3.937619177335876e-05, "loss": 2.6125, "step": 45000 }, { "epoch": 2.37, "eval_loss": 5.104248046875, "eval_runtime": 76.1947, "eval_samples_per_second": 54.085, "eval_steps_per_second": 3.386, "step": 45000 }, { "epoch": 2.43, "learning_rate": 3.910378643421411e-05, "loss": 2.621, "step": 46000 }, { "epoch": 2.43, "eval_loss": 5.0914106369018555, "eval_runtime": 76.1296, "eval_samples_per_second": 54.131, "eval_steps_per_second": 3.389, "step": 46000 }, { "epoch": 2.48, "learning_rate": 3.8831381095069464e-05, "loss": 2.6215, "step": 47000 }, { "epoch": 2.48, "eval_loss": 5.066018104553223, "eval_runtime": 76.0097, "eval_samples_per_second": 54.217, "eval_steps_per_second": 3.394, "step": 47000 }, { "epoch": 2.53, "learning_rate": 3.855897575592482e-05, "loss": 2.6388, "step": 48000 }, { "epoch": 2.53, "eval_loss": 5.130795001983643, "eval_runtime": 76.0566, "eval_samples_per_second": 54.183, "eval_steps_per_second": 3.392, "step": 48000 }, { "epoch": 2.59, "learning_rate": 3.828657041678017e-05, "loss": 2.6328, "step": 49000 }, { "epoch": 2.59, "eval_loss": 5.16806173324585, "eval_runtime": 76.22, "eval_samples_per_second": 54.067, "eval_steps_per_second": 3.385, "step": 49000 }, { "epoch": 2.64, "learning_rate": 3.801416507763552e-05, "loss": 2.6462, "step": 50000 }, { "epoch": 2.64, "eval_loss": 5.09861946105957, "eval_runtime": 81.8993, "eval_samples_per_second": 50.965, "eval_steps_per_second": 3.187, "step": 50000 }, { "epoch": 2.69, "learning_rate": 3.774175973849088e-05, "loss": 2.6489, "step": 51000 }, { "epoch": 2.69, "eval_loss": 5.686895847320557, "eval_runtime": 81.88, "eval_samples_per_second": 50.977, "eval_steps_per_second": 3.188, "step": 51000 }, { "epoch": 2.74, "learning_rate": 3.746935439934623e-05, "loss": 2.6487, "step": 52000 }, { "epoch": 2.74, "eval_loss": 5.235321998596191, "eval_runtime": 81.927, "eval_samples_per_second": 50.948, "eval_steps_per_second": 3.186, "step": 52000 }, { "epoch": 2.8, "learning_rate": 3.719694906020158e-05, "loss": 2.6291, "step": 53000 }, { "epoch": 2.8, "eval_loss": 5.161744594573975, "eval_runtime": 81.9025, "eval_samples_per_second": 50.963, "eval_steps_per_second": 3.187, "step": 53000 }, { "epoch": 2.85, "learning_rate": 3.6924543721056935e-05, "loss": 2.6168, "step": 54000 }, { "epoch": 2.85, "eval_loss": 5.017256259918213, "eval_runtime": 81.8942, "eval_samples_per_second": 50.968, "eval_steps_per_second": 3.187, "step": 54000 }, { "epoch": 2.9, "learning_rate": 3.665213838191229e-05, "loss": 2.6478, "step": 55000 }, { "epoch": 2.9, "eval_loss": 4.907939434051514, "eval_runtime": 81.992, "eval_samples_per_second": 50.907, "eval_steps_per_second": 3.183, "step": 55000 }, { "epoch": 2.95, "learning_rate": 3.637973304276764e-05, "loss": 2.636, "step": 56000 }, { "epoch": 2.95, "eval_loss": 5.104933261871338, "eval_runtime": 81.9752, "eval_samples_per_second": 50.918, "eval_steps_per_second": 3.184, "step": 56000 }, { "epoch": 3.01, "learning_rate": 3.6107327703623e-05, "loss": 2.6014, "step": 57000 }, { "epoch": 3.01, "eval_loss": 5.3848161697387695, "eval_runtime": 80.9116, "eval_samples_per_second": 51.587, "eval_steps_per_second": 3.226, "step": 57000 }, { "epoch": 3.06, "learning_rate": 3.583492236447834e-05, "loss": 2.4145, "step": 58000 }, { "epoch": 3.06, "eval_loss": 5.649404048919678, "eval_runtime": 78.1589, "eval_samples_per_second": 53.404, "eval_steps_per_second": 3.339, "step": 58000 }, { "epoch": 3.11, "learning_rate": 3.5562517025333695e-05, "loss": 2.4236, "step": 59000 }, { "epoch": 3.11, "eval_loss": 5.877135276794434, "eval_runtime": 78.2014, "eval_samples_per_second": 53.375, "eval_steps_per_second": 3.338, "step": 59000 }, { "epoch": 3.17, "learning_rate": 3.5290111686189054e-05, "loss": 2.448, "step": 60000 }, { "epoch": 3.17, "eval_loss": 5.5254974365234375, "eval_runtime": 78.286, "eval_samples_per_second": 53.317, "eval_steps_per_second": 3.334, "step": 60000 }, { "epoch": 3.22, "learning_rate": 3.5017706347044406e-05, "loss": 2.4516, "step": 61000 }, { "epoch": 3.22, "eval_loss": 5.600991249084473, "eval_runtime": 78.1753, "eval_samples_per_second": 53.393, "eval_steps_per_second": 3.339, "step": 61000 }, { "epoch": 3.27, "learning_rate": 3.474530100789976e-05, "loss": 2.4536, "step": 62000 }, { "epoch": 3.27, "eval_loss": 5.542355537414551, "eval_runtime": 78.3728, "eval_samples_per_second": 53.258, "eval_steps_per_second": 3.33, "step": 62000 }, { "epoch": 3.32, "learning_rate": 3.447289566875511e-05, "loss": 2.4595, "step": 63000 }, { "epoch": 3.32, "eval_loss": 5.826052188873291, "eval_runtime": 78.1666, "eval_samples_per_second": 53.399, "eval_steps_per_second": 3.339, "step": 63000 }, { "epoch": 3.38, "learning_rate": 3.420049032961046e-05, "loss": 2.4539, "step": 64000 }, { "epoch": 3.38, "eval_loss": 5.755754470825195, "eval_runtime": 78.124, "eval_samples_per_second": 53.428, "eval_steps_per_second": 3.341, "step": 64000 }, { "epoch": 3.43, "learning_rate": 3.3928084990465814e-05, "loss": 2.4511, "step": 65000 }, { "epoch": 3.43, "eval_loss": 5.6875505447387695, "eval_runtime": 78.2257, "eval_samples_per_second": 53.358, "eval_steps_per_second": 3.336, "step": 65000 }, { "epoch": 3.48, "learning_rate": 3.3655679651321166e-05, "loss": 2.463, "step": 66000 }, { "epoch": 3.48, "eval_loss": 5.699981689453125, "eval_runtime": 78.274, "eval_samples_per_second": 53.325, "eval_steps_per_second": 3.334, "step": 66000 }, { "epoch": 3.53, "learning_rate": 3.338327431217652e-05, "loss": 2.4678, "step": 67000 }, { "epoch": 3.53, "eval_loss": 5.762045383453369, "eval_runtime": 78.2031, "eval_samples_per_second": 53.374, "eval_steps_per_second": 3.337, "step": 67000 }, { "epoch": 3.59, "learning_rate": 3.311086897303187e-05, "loss": 2.4753, "step": 68000 }, { "epoch": 3.59, "eval_loss": 5.77708101272583, "eval_runtime": 78.2487, "eval_samples_per_second": 53.343, "eval_steps_per_second": 3.336, "step": 68000 }, { "epoch": 3.64, "learning_rate": 3.283846363388722e-05, "loss": 2.4713, "step": 69000 }, { "epoch": 3.64, "eval_loss": 5.61986780166626, "eval_runtime": 78.1013, "eval_samples_per_second": 53.443, "eval_steps_per_second": 3.342, "step": 69000 }, { "epoch": 3.69, "learning_rate": 3.256605829474258e-05, "loss": 2.5024, "step": 70000 }, { "epoch": 3.69, "eval_loss": 5.585144996643066, "eval_runtime": 78.2816, "eval_samples_per_second": 53.32, "eval_steps_per_second": 3.334, "step": 70000 }, { "epoch": 3.75, "learning_rate": 3.2293652955597933e-05, "loss": 2.4772, "step": 71000 }, { "epoch": 3.75, "eval_loss": 5.567023277282715, "eval_runtime": 78.3144, "eval_samples_per_second": 53.298, "eval_steps_per_second": 3.333, "step": 71000 }, { "epoch": 3.8, "learning_rate": 3.202124761645328e-05, "loss": 2.4989, "step": 72000 }, { "epoch": 3.8, "eval_loss": 5.388617038726807, "eval_runtime": 78.311, "eval_samples_per_second": 53.3, "eval_steps_per_second": 3.333, "step": 72000 }, { "epoch": 3.85, "learning_rate": 3.174884227730864e-05, "loss": 2.4908, "step": 73000 }, { "epoch": 3.85, "eval_loss": 5.46661901473999, "eval_runtime": 78.3042, "eval_samples_per_second": 53.305, "eval_steps_per_second": 3.333, "step": 73000 }, { "epoch": 3.9, "learning_rate": 3.147643693816399e-05, "loss": 2.4975, "step": 74000 }, { "epoch": 3.9, "eval_loss": 5.72676944732666, "eval_runtime": 78.4025, "eval_samples_per_second": 53.238, "eval_steps_per_second": 3.329, "step": 74000 }, { "epoch": 3.96, "learning_rate": 3.120403159901934e-05, "loss": 2.4983, "step": 75000 }, { "epoch": 3.96, "eval_loss": 5.200565814971924, "eval_runtime": 78.2961, "eval_samples_per_second": 53.31, "eval_steps_per_second": 3.333, "step": 75000 }, { "epoch": 4.01, "learning_rate": 3.0931626259874694e-05, "loss": 2.4595, "step": 76000 }, { "epoch": 4.01, "eval_loss": 5.285388946533203, "eval_runtime": 77.5634, "eval_samples_per_second": 53.814, "eval_steps_per_second": 3.365, "step": 76000 }, { "epoch": 4.06, "learning_rate": 3.0659220920730046e-05, "loss": 2.2848, "step": 77000 }, { "epoch": 4.06, "eval_loss": 5.081736087799072, "eval_runtime": 77.374, "eval_samples_per_second": 53.946, "eval_steps_per_second": 3.373, "step": 77000 }, { "epoch": 4.12, "learning_rate": 3.0386815581585398e-05, "loss": 2.3182, "step": 78000 }, { "epoch": 4.12, "eval_loss": 5.363966464996338, "eval_runtime": 77.4691, "eval_samples_per_second": 53.88, "eval_steps_per_second": 3.369, "step": 78000 }, { "epoch": 4.17, "learning_rate": 3.0114410242440753e-05, "loss": 2.2998, "step": 79000 }, { "epoch": 4.17, "eval_loss": 5.359724044799805, "eval_runtime": 77.9931, "eval_samples_per_second": 53.518, "eval_steps_per_second": 3.346, "step": 79000 }, { "epoch": 4.22, "learning_rate": 2.9842004903296105e-05, "loss": 2.3085, "step": 80000 }, { "epoch": 4.22, "eval_loss": 5.6501922607421875, "eval_runtime": 77.8375, "eval_samples_per_second": 53.625, "eval_steps_per_second": 3.353, "step": 80000 }, { "epoch": 4.27, "learning_rate": 2.9569599564151457e-05, "loss": 2.2716, "step": 81000 }, { "epoch": 4.27, "eval_loss": 5.682339668273926, "eval_runtime": 78.1512, "eval_samples_per_second": 53.409, "eval_steps_per_second": 3.34, "step": 81000 }, { "epoch": 4.33, "learning_rate": 2.9297194225006813e-05, "loss": 2.2716, "step": 82000 }, { "epoch": 4.33, "eval_loss": 5.677391052246094, "eval_runtime": 78.1619, "eval_samples_per_second": 53.402, "eval_steps_per_second": 3.339, "step": 82000 }, { "epoch": 4.38, "learning_rate": 2.9024788885862165e-05, "loss": 2.284, "step": 83000 }, { "epoch": 4.38, "eval_loss": 5.525763988494873, "eval_runtime": 78.1139, "eval_samples_per_second": 53.435, "eval_steps_per_second": 3.341, "step": 83000 }, { "epoch": 4.43, "learning_rate": 2.8752383546717514e-05, "loss": 2.2916, "step": 84000 }, { "epoch": 4.43, "eval_loss": 5.277717590332031, "eval_runtime": 78.1112, "eval_samples_per_second": 53.437, "eval_steps_per_second": 3.341, "step": 84000 }, { "epoch": 4.48, "learning_rate": 2.8479978207572872e-05, "loss": 2.2978, "step": 85000 }, { "epoch": 4.48, "eval_loss": 5.4305100440979, "eval_runtime": 78.0088, "eval_samples_per_second": 53.507, "eval_steps_per_second": 3.346, "step": 85000 }, { "epoch": 4.54, "learning_rate": 2.820757286842822e-05, "loss": 2.3053, "step": 86000 }, { "epoch": 4.54, "eval_loss": 5.21947717666626, "eval_runtime": 77.9992, "eval_samples_per_second": 53.513, "eval_steps_per_second": 3.346, "step": 86000 }, { "epoch": 4.59, "learning_rate": 2.7935167529283573e-05, "loss": 2.3057, "step": 87000 }, { "epoch": 4.59, "eval_loss": 5.573265075683594, "eval_runtime": 78.0097, "eval_samples_per_second": 53.506, "eval_steps_per_second": 3.346, "step": 87000 }, { "epoch": 4.64, "learning_rate": 2.766276219013893e-05, "loss": 2.321, "step": 88000 }, { "epoch": 4.64, "eval_loss": 5.401345252990723, "eval_runtime": 77.9848, "eval_samples_per_second": 53.523, "eval_steps_per_second": 3.347, "step": 88000 }, { "epoch": 4.7, "learning_rate": 2.739035685099428e-05, "loss": 2.3067, "step": 89000 }, { "epoch": 4.7, "eval_loss": 5.604618072509766, "eval_runtime": 77.7786, "eval_samples_per_second": 53.665, "eval_steps_per_second": 3.356, "step": 89000 }, { "epoch": 4.75, "learning_rate": 2.7117951511849633e-05, "loss": 2.317, "step": 90000 }, { "epoch": 4.75, "eval_loss": 5.823883533477783, "eval_runtime": 77.9289, "eval_samples_per_second": 53.562, "eval_steps_per_second": 3.349, "step": 90000 }, { "epoch": 4.8, "learning_rate": 2.6845546172704988e-05, "loss": 2.3356, "step": 91000 }, { "epoch": 4.8, "eval_loss": 5.789203643798828, "eval_runtime": 78.1796, "eval_samples_per_second": 53.39, "eval_steps_per_second": 3.338, "step": 91000 }, { "epoch": 4.85, "learning_rate": 2.657314083356034e-05, "loss": 2.3301, "step": 92000 }, { "epoch": 4.85, "eval_loss": 5.643290996551514, "eval_runtime": 78.4214, "eval_samples_per_second": 53.225, "eval_steps_per_second": 3.328, "step": 92000 }, { "epoch": 4.91, "learning_rate": 2.630073549441569e-05, "loss": 2.3475, "step": 93000 }, { "epoch": 4.91, "eval_loss": 5.742900848388672, "eval_runtime": 78.38, "eval_samples_per_second": 53.253, "eval_steps_per_second": 3.33, "step": 93000 }, { "epoch": 4.96, "learning_rate": 2.6028330155271048e-05, "loss": 2.3237, "step": 94000 }, { "epoch": 4.96, "eval_loss": 5.759471416473389, "eval_runtime": 78.3893, "eval_samples_per_second": 53.247, "eval_steps_per_second": 3.33, "step": 94000 }, { "epoch": 5.01, "learning_rate": 2.5755924816126396e-05, "loss": 2.2959, "step": 95000 }, { "epoch": 5.01, "eval_loss": 5.965908050537109, "eval_runtime": 78.3426, "eval_samples_per_second": 53.279, "eval_steps_per_second": 3.332, "step": 95000 }, { "epoch": 5.06, "learning_rate": 2.548351947698175e-05, "loss": 2.192, "step": 96000 }, { "epoch": 5.06, "eval_loss": 5.630726337432861, "eval_runtime": 78.3297, "eval_samples_per_second": 53.288, "eval_steps_per_second": 3.332, "step": 96000 }, { "epoch": 5.12, "learning_rate": 2.5211114137837104e-05, "loss": 2.1869, "step": 97000 }, { "epoch": 5.12, "eval_loss": 5.92326545715332, "eval_runtime": 78.461, "eval_samples_per_second": 53.198, "eval_steps_per_second": 3.326, "step": 97000 }, { "epoch": 5.17, "learning_rate": 2.4938708798692456e-05, "loss": 2.203, "step": 98000 }, { "epoch": 5.17, "eval_loss": 5.989378929138184, "eval_runtime": 78.6971, "eval_samples_per_second": 53.039, "eval_steps_per_second": 3.317, "step": 98000 }, { "epoch": 5.22, "learning_rate": 2.4666303459547808e-05, "loss": 2.2222, "step": 99000 }, { "epoch": 5.22, "eval_loss": 5.885297775268555, "eval_runtime": 78.4475, "eval_samples_per_second": 53.208, "eval_steps_per_second": 3.327, "step": 99000 }, { "epoch": 5.28, "learning_rate": 2.439389812040316e-05, "loss": 2.225, "step": 100000 }, { "epoch": 5.28, "eval_loss": 5.9232964515686035, "eval_runtime": 78.1906, "eval_samples_per_second": 53.382, "eval_steps_per_second": 3.338, "step": 100000 } ], "max_steps": 189550, "num_train_epochs": 10, "total_flos": 2.5166251732608e+16, "trial_name": null, "trial_params": null }