{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 732, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 1.9963183634476757e-05, "loss": 1.9326, "step": 20 }, { "epoch": 0.11, "learning_rate": 1.985300562686109e-05, "loss": 1.8276, "step": 40 }, { "epoch": 0.16, "learning_rate": 1.9670277247913205e-05, "loss": 1.8392, "step": 60 }, { "epoch": 0.22, "learning_rate": 1.941634397659126e-05, "loss": 1.3092, "step": 80 }, { "epoch": 0.27, "learning_rate": 1.909307559292236e-05, "loss": 1.1181, "step": 100 }, { "epoch": 0.33, "learning_rate": 1.8702852410301556e-05, "loss": 1.2467, "step": 120 }, { "epoch": 0.38, "learning_rate": 1.8248547748594246e-05, "loss": 1.0989, "step": 140 }, { "epoch": 0.44, "learning_rate": 1.77335067770973e-05, "loss": 1.1427, "step": 160 }, { "epoch": 0.49, "learning_rate": 1.7161521883143936e-05, "loss": 1.1778, "step": 180 }, { "epoch": 0.55, "learning_rate": 1.653680474772006e-05, "loss": 1.0765, "step": 200 }, { "epoch": 0.6, "learning_rate": 1.586395533370696e-05, "loss": 1.0176, "step": 220 }, { "epoch": 0.66, "learning_rate": 1.5147928015098309e-05, "loss": 1.1858, "step": 240 }, { "epoch": 0.71, "learning_rate": 1.4393995096591415e-05, "loss": 1.1799, "step": 260 }, { "epoch": 0.77, "learning_rate": 1.3607707992167836e-05, "loss": 1.1227, "step": 280 }, { "epoch": 0.82, "learning_rate": 1.2794856348516095e-05, "loss": 1.0963, "step": 300 }, { "epoch": 0.87, "learning_rate": 1.196142541428197e-05, "loss": 1.0744, "step": 320 }, { "epoch": 0.93, "learning_rate": 1.1113551969048088e-05, "loss": 1.1303, "step": 340 }, { "epoch": 0.98, "learning_rate": 1.0257479136549889e-05, "loss": 1.1179, "step": 360 }, { "epoch": 1.04, "learning_rate": 9.399510414850518e-06, "loss": 1.0918, "step": 380 }, { "epoch": 1.09, "learning_rate": 8.545963261963102e-06, "loss": 1.0483, "step": 400 }, { "epoch": 1.15, "learning_rate": 7.703122578682047e-06, "loss": 1.2234, "step": 420 }, { "epoch": 1.2, "learning_rate": 6.877194431142055e-06, "loss": 1.1311, "step": 440 }, { "epoch": 1.26, "learning_rate": 6.074260353858283e-06, "loss": 1.1261, "step": 460 }, { "epoch": 1.31, "learning_rate": 5.300232569726805e-06, "loss": 1.1854, "step": 480 }, { "epoch": 1.37, "learning_rate": 4.560810456712754e-06, "loss": 1.1388, "step": 500 }, { "epoch": 1.42, "learning_rate": 3.86143858177388e-06, "loss": 1.0478, "step": 520 }, { "epoch": 1.48, "learning_rate": 3.207266611027069e-06, "loss": 1.1229, "step": 540 }, { "epoch": 1.53, "learning_rate": 2.6031113913503337e-06, "loss": 0.9668, "step": 560 }, { "epoch": 1.58, "learning_rate": 2.0534214826237486e-06, "loss": 1.1179, "step": 580 }, { "epoch": 1.64, "learning_rate": 1.5622444017681438e-06, "loss": 1.0696, "step": 600 }, { "epoch": 1.69, "learning_rate": 1.1331968197725985e-06, "loss": 1.055, "step": 620 }, { "epoch": 1.75, "learning_rate": 7.694379311582401e-07, "loss": 1.058, "step": 640 }, { "epoch": 1.8, "learning_rate": 4.73646191966175e-07, "loss": 1.0865, "step": 660 }, { "epoch": 1.86, "learning_rate": 2.479995975541749e-07, "loss": 0.9857, "step": 680 }, { "epoch": 1.91, "learning_rate": 9.415964542203059e-08, "loss": 1.1078, "step": 700 }, { "epoch": 1.97, "learning_rate": 1.325910115169471e-08, "loss": 1.1117, "step": 720 }, { "epoch": 2.0, "step": 732, "total_flos": 2.256724490932224e+16, "train_loss": 1.1766422537506604, "train_runtime": 7172.2389, "train_samples_per_second": 0.204, "train_steps_per_second": 0.102 } ], "logging_steps": 20, "max_steps": 732, "num_train_epochs": 2, "save_steps": 20, "total_flos": 2.256724490932224e+16, "trial_name": null, "trial_params": null }