{ "best_metric": 1.5952510833740234, "best_model_checkpoint": "output/egor-letov/checkpoint-162", "epoch": 1.0, "global_step": 162, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.00013687777179141117, "loss": 2.4061, "step": 5 }, { "epoch": 0.06, "learning_rate": 0.00013591411430903858, "loss": 2.0285, "step": 10 }, { "epoch": 0.09, "learning_rate": 0.00013431808054484255, "loss": 2.1022, "step": 15 }, { "epoch": 0.12, "learning_rate": 0.0001321046642918459, "loss": 2.0273, "step": 20 }, { "epoch": 0.15, "learning_rate": 0.00012929465928631872, "loss": 2.0219, "step": 25 }, { "epoch": 0.19, "learning_rate": 0.00012591446386292745, "loss": 1.8839, "step": 30 }, { "epoch": 0.22, "learning_rate": 0.00012199583295799738, "loss": 1.8914, "step": 35 }, { "epoch": 0.25, "learning_rate": 0.00011757557979067189, "loss": 1.9117, "step": 40 }, { "epoch": 0.28, "learning_rate": 0.00011269523002449659, "loss": 1.7911, "step": 45 }, { "epoch": 0.31, "learning_rate": 0.00010740063165837655, "loss": 1.883, "step": 50 }, { "epoch": 0.34, "learning_rate": 0.0001017415243117494, "loss": 1.8814, "step": 55 }, { "epoch": 0.37, "learning_rate": 9.577107195028616e-05, "loss": 1.8308, "step": 60 }, { "epoch": 0.4, "learning_rate": 8.954536344188477e-05, "loss": 1.8842, "step": 65 }, { "epoch": 0.43, "learning_rate": 8.312288563493759e-05, "loss": 1.7583, "step": 70 }, { "epoch": 0.46, "learning_rate": 7.65639739089908e-05, "loss": 1.9336, "step": 75 }, { "epoch": 0.49, "learning_rate": 6.993024535954715e-05, "loss": 1.8095, "step": 80 }, { "epoch": 0.52, "learning_rate": 6.328401994190555e-05, "loss": 1.665, "step": 85 }, { "epoch": 0.56, "learning_rate": 5.668773501204858e-05, "loss": 1.7316, "step": 90 }, { "epoch": 0.59, "learning_rate": 5.0203358764620266e-05, "loss": 1.8011, "step": 95 }, { "epoch": 0.62, "learning_rate": 4.389180807839688e-05, "loss": 1.6939, "step": 100 }, { "epoch": 0.65, "learning_rate": 3.7812376238248296e-05, "loss": 1.7563, "step": 105 }, { "epoch": 0.68, "learning_rate": 3.2022175909803996e-05, "loss": 1.6453, "step": 110 }, { "epoch": 0.71, "learning_rate": 2.6575602599746285e-05, "loss": 1.7147, "step": 115 }, { "epoch": 0.74, "learning_rate": 2.152382364220488e-05, "loss": 1.7494, "step": 120 }, { "epoch": 0.77, "learning_rate": 1.6914297511922475e-05, "loss": 1.8658, "step": 125 }, { "epoch": 0.8, "learning_rate": 1.279032797996043e-05, "loss": 1.7932, "step": 130 }, { "epoch": 0.83, "learning_rate": 9.190657300387505e-06, "loss": 1.7533, "step": 135 }, { "epoch": 0.86, "learning_rate": 6.149102249722346e-06, "loss": 1.762, "step": 140 }, { "epoch": 0.9, "learning_rate": 3.694236438323532e-06, "loss": 1.6914, "step": 145 }, { "epoch": 0.93, "learning_rate": 1.849121878224087e-06, "loss": 1.6333, "step": 150 }, { "epoch": 0.96, "learning_rate": 6.310923291716113e-07, "loss": 1.7231, "step": 155 }, { "epoch": 0.99, "learning_rate": 5.1590458209819155e-08, "loss": 1.6988, "step": 160 }, { "epoch": 1.0, "eval_loss": 1.5952510833740234, "eval_runtime": 10.0961, "eval_samples_per_second": 22.286, "eval_steps_per_second": 2.872, "step": 162 } ], "max_steps": 162, "num_train_epochs": 1, "total_flos": 168402714624000.0, "trial_name": null, "trial_params": null }