{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.995722277064824, "global_step": 569, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.0, "loss": 1.6786, "step": 1 }, { "epoch": 0.05, "learning_rate": 9.998088142969587e-06, "loss": 1.64, "step": 10 }, { "epoch": 0.11, "learning_rate": 9.985017573980262e-06, "loss": 1.06, "step": 20 }, { "epoch": 0.16, "learning_rate": 9.956012654497073e-06, "loss": 0.486, "step": 30 }, { "epoch": 0.21, "learning_rate": 9.911850333228427e-06, "loss": 0.2374, "step": 40 }, { "epoch": 0.26, "learning_rate": 9.865696363132769e-06, "loss": 0.1837, "step": 50 }, { "epoch": 0.32, "learning_rate": 9.7946219754852e-06, "loss": 0.1587, "step": 60 }, { "epoch": 0.37, "learning_rate": 9.708883781112711e-06, "loss": 0.1517, "step": 70 }, { "epoch": 0.42, "learning_rate": 9.629910009876223e-06, "loss": 0.1553, "step": 80 }, { "epoch": 0.47, "learning_rate": 9.51846738818602e-06, "loss": 0.1469, "step": 90 }, { "epoch": 0.53, "learning_rate": 9.39320554720971e-06, "loss": 0.145, "step": 100 }, { "epoch": 0.53, "eval_loss": 0.05455470830202103, "eval_runtime": 85.5684, "eval_samples_per_second": 11.383, "eval_steps_per_second": 1.426, "step": 100 }, { "epoch": 0.58, "learning_rate": 9.297503309182422e-06, "loss": 0.1549, "step": 110 }, { "epoch": 0.63, "learning_rate": 9.149650063920841e-06, "loss": 0.1435, "step": 120 }, { "epoch": 0.68, "learning_rate": 8.989105585268073e-06, "loss": 0.1414, "step": 130 }, { "epoch": 0.74, "learning_rate": 8.816360880276967e-06, "loss": 0.1421, "step": 140 }, { "epoch": 0.79, "learning_rate": 8.631944269006895e-06, "loss": 0.1391, "step": 150 }, { "epoch": 0.84, "learning_rate": 8.436419768716853e-06, "loss": 0.1386, "step": 160 }, { "epoch": 0.9, "learning_rate": 8.230385368882732e-06, "loss": 0.1382, "step": 170 }, { "epoch": 0.95, "learning_rate": 8.014471202314443e-06, "loss": 0.1382, "step": 180 }, { "epoch": 1.0, "learning_rate": 7.789337617966275e-06, "loss": 0.1357, "step": 190 }, { "epoch": 1.05, "learning_rate": 7.5794033237905e-06, "loss": 0.1408, "step": 200 }, { "epoch": 1.05, "eval_loss": 0.048922911286354065, "eval_runtime": 85.5902, "eval_samples_per_second": 11.38, "eval_steps_per_second": 1.425, "step": 200 }, { "epoch": 1.11, "learning_rate": 7.338671397287409e-06, "loss": 0.1341, "step": 210 }, { "epoch": 1.16, "learning_rate": 7.1158757783214904e-06, "loss": 0.1391, "step": 220 }, { "epoch": 1.21, "learning_rate": 6.862201608610134e-06, "loss": 0.1343, "step": 230 }, { "epoch": 1.26, "learning_rate": 6.602832106793113e-06, "loss": 0.1316, "step": 240 }, { "epoch": 1.32, "learning_rate": 6.338560525031794e-06, "loss": 0.1316, "step": 250 }, { "epoch": 1.37, "learning_rate": 6.0701951079422615e-06, "loss": 0.1313, "step": 260 }, { "epoch": 1.42, "learning_rate": 5.825844147403353e-06, "loss": 0.1347, "step": 270 }, { "epoch": 1.47, "learning_rate": 5.579447229838992e-06, "loss": 0.1337, "step": 280 }, { "epoch": 1.53, "learning_rate": 5.3040158758857886e-06, "loss": 0.1315, "step": 290 }, { "epoch": 1.58, "learning_rate": 5.027654723907197e-06, "loss": 0.1309, "step": 300 }, { "epoch": 1.58, "eval_loss": 0.04729650914669037, "eval_runtime": 85.6206, "eval_samples_per_second": 11.376, "eval_steps_per_second": 1.425, "step": 300 }, { "epoch": 1.63, "learning_rate": 4.751208993096637e-06, "loss": 0.1307, "step": 310 }, { "epoch": 1.68, "learning_rate": 4.475524161322288e-06, "loss": 0.1305, "step": 320 }, { "epoch": 1.74, "learning_rate": 4.2014433793290435e-06, "loss": 0.1298, "step": 330 }, { "epoch": 1.79, "learning_rate": 3.92980489205774e-06, "loss": 0.1308, "step": 340 }, { "epoch": 1.84, "learning_rate": 3.6614394749682057e-06, "loss": 0.13, "step": 350 }, { "epoch": 1.9, "learning_rate": 3.3971678932068875e-06, "loss": 0.1296, "step": 360 }, { "epoch": 1.95, "learning_rate": 3.1377983913898673e-06, "loss": 0.1289, "step": 370 }, { "epoch": 2.0, "learning_rate": 2.8841242216785116e-06, "loss": 0.129, "step": 380 }, { "epoch": 2.05, "learning_rate": 2.6369212177078306e-06, "loss": 0.1291, "step": 390 }, { "epoch": 2.11, "learning_rate": 2.3969454217874325e-06, "loss": 0.1277, "step": 400 }, { "epoch": 2.11, "eval_loss": 0.046792980283498764, "eval_runtime": 85.593, "eval_samples_per_second": 11.379, "eval_steps_per_second": 1.425, "step": 400 }, { "epoch": 2.16, "learning_rate": 2.164930772631996e-06, "loss": 0.1276, "step": 410 }, { "epoch": 2.21, "learning_rate": 1.94158686069306e-06, "loss": 0.1275, "step": 420 }, { "epoch": 2.26, "learning_rate": 1.7275967579572427e-06, "loss": 0.1296, "step": 430 }, { "epoch": 2.32, "learning_rate": 1.5236149288481428e-06, "loss": 0.1275, "step": 440 }, { "epoch": 2.37, "learning_rate": 1.3302652286212397e-06, "loss": 0.1287, "step": 450 }, { "epoch": 2.42, "learning_rate": 1.148138995373459e-06, "loss": 0.1267, "step": 460 }, { "epoch": 2.47, "learning_rate": 9.777932415027608e-07, "loss": 0.1282, "step": 470 }, { "epoch": 2.53, "learning_rate": 8.197489501489924e-07, "loss": 0.1249, "step": 480 }, { "epoch": 2.58, "learning_rate": 6.744894818261311e-07, "loss": 0.127, "step": 490 }, { "epoch": 2.63, "learning_rate": 5.424590961190474e-07, "loss": 0.1253, "step": 500 }, { "epoch": 2.63, "eval_loss": 0.046507786959409714, "eval_runtime": 85.5909, "eval_samples_per_second": 11.38, "eval_steps_per_second": 1.425, "step": 500 }, { "epoch": 2.69, "learning_rate": 4.240615929660341e-07, "loss": 0.1279, "step": 510 }, { "epoch": 2.74, "learning_rate": 3.1965907768255035e-07, "loss": 0.128, "step": 520 }, { "epoch": 2.79, "learning_rate": 2.2957085350325092e-07, "loss": 0.1288, "step": 530 }, { "epoch": 2.84, "learning_rate": 1.540724450293035e-07, "loss": 0.1264, "step": 540 }, { "epoch": 2.9, "learning_rate": 9.339475556770006e-08, "loss": 0.1274, "step": 550 }, { "epoch": 2.95, "learning_rate": 4.77233609397082e-08, "loss": 0.1277, "step": 560 }, { "epoch": 3.0, "step": 569, "total_flos": 1.6024405879092675e+18, "train_loss": 0.18567189055712655, "train_runtime": 15471.5941, "train_samples_per_second": 4.707, "train_steps_per_second": 0.037 } ], "max_steps": 569, "num_train_epochs": 4, "total_flos": 1.6024405879092675e+18, "trial_name": null, "trial_params": null }