{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.893337455557273, "eval_steps": 200, "global_step": 16000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 0.0002981331673926571, "loss": 2.3038, "step": 200 }, { "epoch": 0.12, "eval_loss": 1.589147925376892, "eval_runtime": 2.1684, "eval_samples_per_second": 461.163, "eval_steps_per_second": 57.645, "step": 200 }, { "epoch": 0.25, "learning_rate": 0.00029439950217797134, "loss": 1.5398, "step": 400 }, { "epoch": 0.25, "eval_loss": 1.4633430242538452, "eval_runtime": 2.1071, "eval_samples_per_second": 474.58, "eval_steps_per_second": 59.323, "step": 400 }, { "epoch": 0.37, "learning_rate": 0.00029066583696328563, "loss": 1.4615, "step": 600 }, { "epoch": 0.37, "eval_loss": 1.4296711683273315, "eval_runtime": 2.4812, "eval_samples_per_second": 403.032, "eval_steps_per_second": 50.379, "step": 600 }, { "epoch": 0.49, "learning_rate": 0.0002869321717485998, "loss": 1.4244, "step": 800 }, { "epoch": 0.49, "eval_loss": 1.3793567419052124, "eval_runtime": 2.9698, "eval_samples_per_second": 336.72, "eval_steps_per_second": 42.09, "step": 800 }, { "epoch": 0.62, "learning_rate": 0.0002831985065339141, "loss": 1.3921, "step": 1000 }, { "epoch": 0.62, "eval_loss": 1.3315461874008179, "eval_runtime": 2.7793, "eval_samples_per_second": 359.806, "eval_steps_per_second": 44.976, "step": 1000 }, { "epoch": 0.74, "learning_rate": 0.00027946484131922836, "loss": 1.0958, "step": 1200 }, { "epoch": 0.74, "eval_loss": 0.7548955082893372, "eval_runtime": 2.7656, "eval_samples_per_second": 361.592, "eval_steps_per_second": 45.199, "step": 1200 }, { "epoch": 0.87, "learning_rate": 0.0002757311761045426, "loss": 0.6312, "step": 1400 }, { "epoch": 0.87, "eval_loss": 0.3990221917629242, "eval_runtime": 2.1502, "eval_samples_per_second": 465.071, "eval_steps_per_second": 58.134, "step": 1400 }, { "epoch": 0.99, "learning_rate": 0.00027199751088985685, "loss": 0.4093, "step": 1600 }, { "epoch": 0.99, "eval_loss": 0.26113563776016235, "eval_runtime": 3.5103, "eval_samples_per_second": 284.875, "eval_steps_per_second": 35.609, "step": 1600 }, { "epoch": 1.11, "learning_rate": 0.00026826384567517114, "loss": 0.2959, "step": 1800 }, { "epoch": 1.11, "eval_loss": 0.2783801257610321, "eval_runtime": 2.8441, "eval_samples_per_second": 351.603, "eval_steps_per_second": 43.95, "step": 1800 }, { "epoch": 1.24, "learning_rate": 0.00026453018046048533, "loss": 0.2589, "step": 2000 }, { "epoch": 1.24, "eval_loss": 0.20705343782901764, "eval_runtime": 2.7524, "eval_samples_per_second": 363.322, "eval_steps_per_second": 45.415, "step": 2000 }, { "epoch": 1.36, "learning_rate": 0.0002607965152457996, "loss": 0.2246, "step": 2200 }, { "epoch": 1.36, "eval_loss": 0.15551678836345673, "eval_runtime": 2.15, "eval_samples_per_second": 465.117, "eval_steps_per_second": 58.14, "step": 2200 }, { "epoch": 1.48, "learning_rate": 0.00025706285003111387, "loss": 0.1991, "step": 2400 }, { "epoch": 1.48, "eval_loss": 0.15825262665748596, "eval_runtime": 2.1344, "eval_samples_per_second": 468.515, "eval_steps_per_second": 58.564, "step": 2400 }, { "epoch": 1.61, "learning_rate": 0.0002533291848164281, "loss": 0.1784, "step": 2600 }, { "epoch": 1.61, "eval_loss": 0.12008943408727646, "eval_runtime": 2.1414, "eval_samples_per_second": 466.985, "eval_steps_per_second": 58.373, "step": 2600 }, { "epoch": 1.73, "learning_rate": 0.00024959551960174235, "loss": 0.1598, "step": 2800 }, { "epoch": 1.73, "eval_loss": 0.12511701881885529, "eval_runtime": 2.55, "eval_samples_per_second": 392.155, "eval_steps_per_second": 49.019, "step": 2800 }, { "epoch": 1.86, "learning_rate": 0.0002458618543870566, "loss": 0.164, "step": 3000 }, { "epoch": 1.86, "eval_loss": 0.11049681156873703, "eval_runtime": 2.9765, "eval_samples_per_second": 335.97, "eval_steps_per_second": 41.996, "step": 3000 }, { "epoch": 1.98, "learning_rate": 0.00024212818917237084, "loss": 0.1475, "step": 3200 }, { "epoch": 1.98, "eval_loss": 0.0954003781080246, "eval_runtime": 2.8437, "eval_samples_per_second": 351.659, "eval_steps_per_second": 43.957, "step": 3200 }, { "epoch": 2.1, "learning_rate": 0.0002383945239576851, "loss": 0.1388, "step": 3400 }, { "epoch": 2.1, "eval_loss": 0.10116879642009735, "eval_runtime": 2.7628, "eval_samples_per_second": 361.954, "eval_steps_per_second": 45.244, "step": 3400 }, { "epoch": 2.23, "learning_rate": 0.00023466085874299935, "loss": 0.1346, "step": 3600 }, { "epoch": 2.23, "eval_loss": 0.10693109035491943, "eval_runtime": 3.1697, "eval_samples_per_second": 315.491, "eval_steps_per_second": 39.436, "step": 3600 }, { "epoch": 2.35, "learning_rate": 0.00023092719352831362, "loss": 0.1232, "step": 3800 }, { "epoch": 2.35, "eval_loss": 0.09901304543018341, "eval_runtime": 2.1178, "eval_samples_per_second": 472.183, "eval_steps_per_second": 59.023, "step": 3800 }, { "epoch": 2.47, "learning_rate": 0.00022719352831362786, "loss": 0.1187, "step": 4000 }, { "epoch": 2.47, "eval_loss": 0.11418598890304565, "eval_runtime": 2.1348, "eval_samples_per_second": 468.423, "eval_steps_per_second": 58.553, "step": 4000 }, { "epoch": 2.6, "learning_rate": 0.0002234598630989421, "loss": 0.1133, "step": 4200 }, { "epoch": 2.6, "eval_loss": 0.0984039306640625, "eval_runtime": 2.1382, "eval_samples_per_second": 467.676, "eval_steps_per_second": 58.459, "step": 4200 }, { "epoch": 2.72, "learning_rate": 0.00021972619788425635, "loss": 0.1088, "step": 4400 }, { "epoch": 2.72, "eval_loss": 0.07466612011194229, "eval_runtime": 2.8862, "eval_samples_per_second": 346.477, "eval_steps_per_second": 43.31, "step": 4400 }, { "epoch": 2.84, "learning_rate": 0.00021599253266957062, "loss": 0.1025, "step": 4600 }, { "epoch": 2.84, "eval_loss": 0.1227998435497284, "eval_runtime": 2.8738, "eval_samples_per_second": 347.966, "eval_steps_per_second": 43.496, "step": 4600 }, { "epoch": 2.97, "learning_rate": 0.00021225886745488486, "loss": 0.0971, "step": 4800 }, { "epoch": 2.97, "eval_loss": 0.07324225455522537, "eval_runtime": 2.2831, "eval_samples_per_second": 437.994, "eval_steps_per_second": 54.749, "step": 4800 }, { "epoch": 3.09, "learning_rate": 0.00020852520224019913, "loss": 0.0853, "step": 5000 }, { "epoch": 3.09, "eval_loss": 0.07788190990686417, "eval_runtime": 2.1358, "eval_samples_per_second": 468.199, "eval_steps_per_second": 58.525, "step": 5000 }, { "epoch": 3.22, "learning_rate": 0.00020479153702551337, "loss": 0.0865, "step": 5200 }, { "epoch": 3.22, "eval_loss": 0.06575259566307068, "eval_runtime": 2.1474, "eval_samples_per_second": 465.679, "eval_steps_per_second": 58.21, "step": 5200 }, { "epoch": 3.34, "learning_rate": 0.0002010578718108276, "loss": 0.0768, "step": 5400 }, { "epoch": 3.34, "eval_loss": 0.08183684200048447, "eval_runtime": 2.1211, "eval_samples_per_second": 471.453, "eval_steps_per_second": 58.932, "step": 5400 }, { "epoch": 3.46, "learning_rate": 0.00019732420659614186, "loss": 0.0738, "step": 5600 }, { "epoch": 3.46, "eval_loss": 0.04662672430276871, "eval_runtime": 2.7913, "eval_samples_per_second": 358.253, "eval_steps_per_second": 44.782, "step": 5600 }, { "epoch": 3.59, "learning_rate": 0.0001935905413814561, "loss": 0.0622, "step": 5800 }, { "epoch": 3.59, "eval_loss": 0.0433196946978569, "eval_runtime": 3.1597, "eval_samples_per_second": 316.49, "eval_steps_per_second": 39.561, "step": 5800 }, { "epoch": 3.71, "learning_rate": 0.00018985687616677037, "loss": 0.0671, "step": 6000 }, { "epoch": 3.71, "eval_loss": 0.038382936269044876, "eval_runtime": 2.1009, "eval_samples_per_second": 475.976, "eval_steps_per_second": 59.497, "step": 6000 }, { "epoch": 3.83, "learning_rate": 0.0001861232109520846, "loss": 0.0545, "step": 6200 }, { "epoch": 3.83, "eval_loss": 0.04082392156124115, "eval_runtime": 2.1346, "eval_samples_per_second": 468.481, "eval_steps_per_second": 58.56, "step": 6200 }, { "epoch": 3.96, "learning_rate": 0.00018238954573739888, "loss": 0.0564, "step": 6400 }, { "epoch": 3.96, "eval_loss": 0.043197453022003174, "eval_runtime": 2.1169, "eval_samples_per_second": 472.389, "eval_steps_per_second": 59.049, "step": 6400 }, { "epoch": 4.08, "learning_rate": 0.0001786558805227131, "loss": 0.0523, "step": 6600 }, { "epoch": 4.08, "eval_loss": 0.03342806547880173, "eval_runtime": 2.4926, "eval_samples_per_second": 401.182, "eval_steps_per_second": 50.148, "step": 6600 }, { "epoch": 4.2, "learning_rate": 0.00017492221530802736, "loss": 0.0456, "step": 6800 }, { "epoch": 4.2, "eval_loss": 0.02744474820792675, "eval_runtime": 3.0, "eval_samples_per_second": 333.335, "eval_steps_per_second": 41.667, "step": 6800 }, { "epoch": 4.33, "learning_rate": 0.0001711885500933416, "loss": 0.0442, "step": 7000 }, { "epoch": 4.33, "eval_loss": 0.024560416117310524, "eval_runtime": 2.6752, "eval_samples_per_second": 373.806, "eval_steps_per_second": 46.726, "step": 7000 }, { "epoch": 4.45, "learning_rate": 0.00016745488487865588, "loss": 0.0383, "step": 7200 }, { "epoch": 4.45, "eval_loss": 0.018605533987283707, "eval_runtime": 2.1117, "eval_samples_per_second": 473.559, "eval_steps_per_second": 59.195, "step": 7200 }, { "epoch": 4.58, "learning_rate": 0.00016372121966397012, "loss": 0.0348, "step": 7400 }, { "epoch": 4.58, "eval_loss": 0.01473915483802557, "eval_runtime": 2.1223, "eval_samples_per_second": 471.193, "eval_steps_per_second": 58.899, "step": 7400 }, { "epoch": 4.7, "learning_rate": 0.0001599875544492844, "loss": 0.0299, "step": 7600 }, { "epoch": 4.7, "eval_loss": 0.025838036090135574, "eval_runtime": 2.1138, "eval_samples_per_second": 473.088, "eval_steps_per_second": 59.136, "step": 7600 }, { "epoch": 4.82, "learning_rate": 0.0001562538892345986, "loss": 0.0268, "step": 7800 }, { "epoch": 4.82, "eval_loss": 0.01688736118376255, "eval_runtime": 2.1658, "eval_samples_per_second": 461.718, "eval_steps_per_second": 57.715, "step": 7800 }, { "epoch": 4.95, "learning_rate": 0.00015252022401991287, "loss": 0.0272, "step": 8000 }, { "epoch": 4.95, "eval_loss": 0.020514091476798058, "eval_runtime": 2.1415, "eval_samples_per_second": 466.966, "eval_steps_per_second": 58.371, "step": 8000 }, { "epoch": 5.07, "learning_rate": 0.00014878655880522712, "loss": 0.0277, "step": 8200 }, { "epoch": 5.07, "eval_loss": 0.018993763253092766, "eval_runtime": 2.3074, "eval_samples_per_second": 433.383, "eval_steps_per_second": 54.173, "step": 8200 }, { "epoch": 5.19, "learning_rate": 0.00014505289359054139, "loss": 0.0253, "step": 8400 }, { "epoch": 5.19, "eval_loss": 0.0132982786744833, "eval_runtime": 2.7723, "eval_samples_per_second": 360.706, "eval_steps_per_second": 45.088, "step": 8400 }, { "epoch": 5.32, "learning_rate": 0.00014131922837585563, "loss": 0.0208, "step": 8600 }, { "epoch": 5.32, "eval_loss": 0.011603164486587048, "eval_runtime": 2.2147, "eval_samples_per_second": 451.518, "eval_steps_per_second": 56.44, "step": 8600 }, { "epoch": 5.44, "learning_rate": 0.00013758556316116987, "loss": 0.019, "step": 8800 }, { "epoch": 5.44, "eval_loss": 0.007933158427476883, "eval_runtime": 2.565, "eval_samples_per_second": 389.858, "eval_steps_per_second": 48.732, "step": 8800 }, { "epoch": 5.57, "learning_rate": 0.00013385189794648414, "loss": 0.0179, "step": 9000 }, { "epoch": 5.57, "eval_loss": 0.00808796752244234, "eval_runtime": 2.157, "eval_samples_per_second": 463.605, "eval_steps_per_second": 57.951, "step": 9000 }, { "epoch": 5.69, "learning_rate": 0.00013011823273179835, "loss": 0.0136, "step": 9200 }, { "epoch": 5.69, "eval_loss": 0.02137412503361702, "eval_runtime": 2.1642, "eval_samples_per_second": 462.06, "eval_steps_per_second": 57.758, "step": 9200 }, { "epoch": 5.81, "learning_rate": 0.00012638456751711262, "loss": 0.0196, "step": 9400 }, { "epoch": 5.81, "eval_loss": 0.009271830320358276, "eval_runtime": 2.7483, "eval_samples_per_second": 363.865, "eval_steps_per_second": 45.483, "step": 9400 }, { "epoch": 5.94, "learning_rate": 0.00012265090230242687, "loss": 0.015, "step": 9600 }, { "epoch": 5.94, "eval_loss": 0.011388062499463558, "eval_runtime": 3.1063, "eval_samples_per_second": 321.931, "eval_steps_per_second": 40.241, "step": 9600 }, { "epoch": 6.06, "learning_rate": 0.00011891723708774112, "loss": 0.0196, "step": 9800 }, { "epoch": 6.06, "eval_loss": 0.009324445389211178, "eval_runtime": 2.9695, "eval_samples_per_second": 336.759, "eval_steps_per_second": 42.095, "step": 9800 }, { "epoch": 6.18, "learning_rate": 0.00011518357187305538, "loss": 0.0192, "step": 10000 }, { "epoch": 6.18, "eval_loss": 0.008494062349200249, "eval_runtime": 2.1785, "eval_samples_per_second": 459.035, "eval_steps_per_second": 57.379, "step": 10000 }, { "epoch": 6.31, "learning_rate": 0.00011144990665836963, "loss": 0.0155, "step": 10200 }, { "epoch": 6.31, "eval_loss": 0.005131287965923548, "eval_runtime": 2.2151, "eval_samples_per_second": 451.441, "eval_steps_per_second": 56.43, "step": 10200 }, { "epoch": 6.43, "learning_rate": 0.00010771624144368388, "loss": 0.0182, "step": 10400 }, { "epoch": 6.43, "eval_loss": 0.01033452432602644, "eval_runtime": 2.204, "eval_samples_per_second": 453.729, "eval_steps_per_second": 56.716, "step": 10400 }, { "epoch": 6.55, "learning_rate": 0.00010398257622899813, "loss": 0.0149, "step": 10600 }, { "epoch": 6.55, "eval_loss": 0.006081216037273407, "eval_runtime": 2.6138, "eval_samples_per_second": 382.582, "eval_steps_per_second": 47.823, "step": 10600 }, { "epoch": 6.68, "learning_rate": 0.00010024891101431236, "loss": 0.0155, "step": 10800 }, { "epoch": 6.68, "eval_loss": 0.008235114626586437, "eval_runtime": 2.9799, "eval_samples_per_second": 335.587, "eval_steps_per_second": 41.948, "step": 10800 }, { "epoch": 6.8, "learning_rate": 9.651524579962662e-05, "loss": 0.0125, "step": 11000 }, { "epoch": 6.8, "eval_loss": 0.0061024767346680164, "eval_runtime": 3.1763, "eval_samples_per_second": 314.832, "eval_steps_per_second": 39.354, "step": 11000 }, { "epoch": 6.93, "learning_rate": 9.278158058494087e-05, "loss": 0.0126, "step": 11200 }, { "epoch": 6.93, "eval_loss": 0.0077368393540382385, "eval_runtime": 2.1677, "eval_samples_per_second": 461.31, "eval_steps_per_second": 57.664, "step": 11200 }, { "epoch": 7.05, "learning_rate": 8.904791537025512e-05, "loss": 0.016, "step": 11400 }, { "epoch": 7.05, "eval_loss": 0.01462015975266695, "eval_runtime": 2.163, "eval_samples_per_second": 462.313, "eval_steps_per_second": 57.789, "step": 11400 }, { "epoch": 7.17, "learning_rate": 8.531425015556937e-05, "loss": 0.0168, "step": 11600 }, { "epoch": 7.17, "eval_loss": 0.013114248402416706, "eval_runtime": 2.177, "eval_samples_per_second": 459.355, "eval_steps_per_second": 57.419, "step": 11600 }, { "epoch": 7.3, "learning_rate": 8.158058494088363e-05, "loss": 0.0115, "step": 11800 }, { "epoch": 7.3, "eval_loss": 0.0058467877097427845, "eval_runtime": 2.8432, "eval_samples_per_second": 351.72, "eval_steps_per_second": 43.965, "step": 11800 }, { "epoch": 7.42, "learning_rate": 7.784691972619787e-05, "loss": 0.0109, "step": 12000 }, { "epoch": 7.42, "eval_loss": 0.007328983396291733, "eval_runtime": 2.9781, "eval_samples_per_second": 335.785, "eval_steps_per_second": 41.973, "step": 12000 }, { "epoch": 7.54, "learning_rate": 7.411325451151213e-05, "loss": 0.01, "step": 12200 }, { "epoch": 7.54, "eval_loss": 0.00543447770178318, "eval_runtime": 2.13, "eval_samples_per_second": 469.492, "eval_steps_per_second": 58.686, "step": 12200 }, { "epoch": 7.67, "learning_rate": 7.037958929682637e-05, "loss": 0.0085, "step": 12400 }, { "epoch": 7.67, "eval_loss": 0.005294375587254763, "eval_runtime": 2.1484, "eval_samples_per_second": 465.459, "eval_steps_per_second": 58.182, "step": 12400 }, { "epoch": 7.79, "learning_rate": 6.664592408214062e-05, "loss": 0.0105, "step": 12600 }, { "epoch": 7.79, "eval_loss": 0.0051603252068161964, "eval_runtime": 2.1621, "eval_samples_per_second": 462.523, "eval_steps_per_second": 57.815, "step": 12600 }, { "epoch": 7.91, "learning_rate": 6.291225886745488e-05, "loss": 0.01, "step": 12800 }, { "epoch": 7.91, "eval_loss": 0.005722519941627979, "eval_runtime": 2.7684, "eval_samples_per_second": 361.216, "eval_steps_per_second": 45.152, "step": 12800 }, { "epoch": 8.04, "learning_rate": 5.917859365276913e-05, "loss": 0.0071, "step": 13000 }, { "epoch": 8.04, "eval_loss": 0.004564732778817415, "eval_runtime": 2.7551, "eval_samples_per_second": 362.961, "eval_steps_per_second": 45.37, "step": 13000 }, { "epoch": 8.16, "learning_rate": 5.5444928438083385e-05, "loss": 0.0065, "step": 13200 }, { "epoch": 8.16, "eval_loss": 0.004461783915758133, "eval_runtime": 3.1705, "eval_samples_per_second": 315.412, "eval_steps_per_second": 39.426, "step": 13200 }, { "epoch": 8.29, "learning_rate": 5.171126322339763e-05, "loss": 0.0075, "step": 13400 }, { "epoch": 8.29, "eval_loss": 0.004132562782615423, "eval_runtime": 3.5027, "eval_samples_per_second": 285.498, "eval_steps_per_second": 35.687, "step": 13400 }, { "epoch": 8.41, "learning_rate": 4.797759800871188e-05, "loss": 0.0072, "step": 13600 }, { "epoch": 8.41, "eval_loss": 0.004298557061702013, "eval_runtime": 2.1516, "eval_samples_per_second": 464.775, "eval_steps_per_second": 58.097, "step": 13600 }, { "epoch": 8.53, "learning_rate": 4.424393279402613e-05, "loss": 0.0077, "step": 13800 }, { "epoch": 8.53, "eval_loss": 0.005747557617723942, "eval_runtime": 2.1174, "eval_samples_per_second": 472.272, "eval_steps_per_second": 59.034, "step": 13800 }, { "epoch": 8.66, "learning_rate": 4.051026757934038e-05, "loss": 0.009, "step": 14000 }, { "epoch": 8.66, "eval_loss": 0.005076244939118624, "eval_runtime": 2.1715, "eval_samples_per_second": 460.514, "eval_steps_per_second": 57.564, "step": 14000 }, { "epoch": 8.78, "learning_rate": 3.677660236465463e-05, "loss": 0.0066, "step": 14200 }, { "epoch": 8.78, "eval_loss": 0.004328867886215448, "eval_runtime": 2.1457, "eval_samples_per_second": 466.038, "eval_steps_per_second": 58.255, "step": 14200 }, { "epoch": 8.9, "learning_rate": 3.304293714996888e-05, "loss": 0.0065, "step": 14400 }, { "epoch": 8.9, "eval_loss": 0.004579309374094009, "eval_runtime": 2.5023, "eval_samples_per_second": 399.626, "eval_steps_per_second": 49.953, "step": 14400 }, { "epoch": 9.03, "learning_rate": 2.9309271935283136e-05, "loss": 0.0047, "step": 14600 }, { "epoch": 9.03, "eval_loss": 0.00406376738101244, "eval_runtime": 3.0193, "eval_samples_per_second": 331.204, "eval_steps_per_second": 41.401, "step": 14600 }, { "epoch": 9.15, "learning_rate": 2.5575606720597382e-05, "loss": 0.0049, "step": 14800 }, { "epoch": 9.15, "eval_loss": 0.0037133253645151854, "eval_runtime": 2.5419, "eval_samples_per_second": 393.406, "eval_steps_per_second": 49.176, "step": 14800 }, { "epoch": 9.28, "learning_rate": 2.1841941505911635e-05, "loss": 0.0048, "step": 15000 }, { "epoch": 9.28, "eval_loss": 0.0035180081613361835, "eval_runtime": 2.1535, "eval_samples_per_second": 464.362, "eval_steps_per_second": 58.045, "step": 15000 }, { "epoch": 9.4, "learning_rate": 1.8108276291225884e-05, "loss": 0.0045, "step": 15200 }, { "epoch": 9.4, "eval_loss": 0.0041992985643446445, "eval_runtime": 2.1652, "eval_samples_per_second": 461.858, "eval_steps_per_second": 57.732, "step": 15200 }, { "epoch": 9.52, "learning_rate": 1.4374611076540135e-05, "loss": 0.0041, "step": 15400 }, { "epoch": 9.52, "eval_loss": 0.003915323410183191, "eval_runtime": 2.7057, "eval_samples_per_second": 369.59, "eval_steps_per_second": 46.199, "step": 15400 }, { "epoch": 9.65, "learning_rate": 1.0640945861854385e-05, "loss": 0.0042, "step": 15600 }, { "epoch": 9.65, "eval_loss": 0.0032798268366605043, "eval_runtime": 3.0263, "eval_samples_per_second": 330.438, "eval_steps_per_second": 41.305, "step": 15600 }, { "epoch": 9.77, "learning_rate": 6.907280647168636e-06, "loss": 0.0041, "step": 15800 }, { "epoch": 9.77, "eval_loss": 0.003197046695277095, "eval_runtime": 2.2279, "eval_samples_per_second": 448.855, "eval_steps_per_second": 56.107, "step": 15800 }, { "epoch": 9.89, "learning_rate": 3.173615432482887e-06, "loss": 0.0039, "step": 16000 }, { "epoch": 9.89, "eval_loss": 0.003054018598049879, "eval_runtime": 2.2116, "eval_samples_per_second": 452.155, "eval_steps_per_second": 56.519, "step": 16000 } ], "logging_steps": 200, "max_steps": 16170, "num_train_epochs": 10, "save_steps": 200, "total_flos": 6146864391499776.0, "trial_name": null, "trial_params": null }