{ "best_metric": 3.6241421699523926, "best_model_checkpoint": "output/gunna/checkpoint-160", "epoch": 1.0, "global_step": 160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.0001368696722497127, "loss": 4.356, "step": 5 }, { "epoch": 0.06, "learning_rate": 0.0001358818702356616, "loss": 4.1179, "step": 10 }, { "epoch": 0.09, "learning_rate": 0.00013424610703122953, "loss": 4.0264, "step": 15 }, { "epoch": 0.12, "learning_rate": 0.00013197813593027427, "loss": 4.0528, "step": 20 }, { "epoch": 0.16, "learning_rate": 0.00012909979873429716, "loss": 3.9366, "step": 25 }, { "epoch": 0.19, "learning_rate": 0.0001256388154039546, "loss": 3.8881, "step": 30 }, { "epoch": 0.22, "learning_rate": 0.00012162851710068375, "loss": 3.7806, "step": 35 }, { "epoch": 0.25, "learning_rate": 0.00011710752518939715, "loss": 3.7294, "step": 40 }, { "epoch": 0.28, "learning_rate": 0.00011211937929362608, "loss": 3.7043, "step": 45 }, { "epoch": 0.31, "learning_rate": 0.00010671211798514472, "loss": 3.7065, "step": 50 }, { "epoch": 0.34, "learning_rate": 0.00010093781614626346, "loss": 3.5885, "step": 55 }, { "epoch": 0.38, "learning_rate": 9.485208346024516e-05, "loss": 3.5777, "step": 60 }, { "epoch": 0.41, "learning_rate": 8.851352885965611e-05, "loss": 3.6253, "step": 65 }, { "epoch": 0.44, "learning_rate": 8.19831960903064e-05, "loss": 3.6754, "step": 70 }, { "epoch": 0.47, "learning_rate": 7.532397582660788e-05, "loss": 3.7343, "step": 75 }, { "epoch": 0.5, "learning_rate": 6.86e-05, "loss": 3.6731, "step": 80 }, { "epoch": 0.53, "learning_rate": 6.187602417339214e-05, "loss": 3.5158, "step": 85 }, { "epoch": 0.56, "learning_rate": 5.52168039096936e-05, "loss": 3.4779, "step": 90 }, { "epoch": 0.59, "learning_rate": 4.8686471140343896e-05, "loss": 3.7252, "step": 95 }, { "epoch": 0.62, "learning_rate": 4.2347916539754844e-05, "loss": 3.5483, "step": 100 }, { "epoch": 0.66, "learning_rate": 3.6262183853736556e-05, "loss": 3.6806, "step": 105 }, { "epoch": 0.69, "learning_rate": 3.0487882014855305e-05, "loss": 3.5955, "step": 110 }, { "epoch": 0.72, "learning_rate": 2.5080620706373927e-05, "loss": 3.5443, "step": 115 }, { "epoch": 0.75, "learning_rate": 2.0092474810602843e-05, "loss": 3.5015, "step": 120 }, { "epoch": 0.78, "learning_rate": 1.557148289931624e-05, "loss": 3.4182, "step": 125 }, { "epoch": 0.81, "learning_rate": 1.1561184596045389e-05, "loss": 3.5311, "step": 130 }, { "epoch": 0.84, "learning_rate": 8.100201265702852e-06, "loss": 3.6302, "step": 135 }, { "epoch": 0.88, "learning_rate": 5.22186406972573e-06, "loss": 3.5087, "step": 140 }, { "epoch": 0.91, "learning_rate": 2.953892968770475e-06, "loss": 3.5036, "step": 145 }, { "epoch": 0.94, "learning_rate": 1.3181297643383925e-06, "loss": 3.4784, "step": 150 }, { "epoch": 0.97, "learning_rate": 3.303277502872983e-07, "loss": 3.2405, "step": 155 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 3.4995, "step": 160 }, { "epoch": 1.0, "eval_loss": 3.6241421699523926, "eval_runtime": 9.1367, "eval_samples_per_second": 23.203, "eval_steps_per_second": 2.955, "step": 160 } ], "max_steps": 160, "num_train_epochs": 1, "total_flos": 166443024384000.0, "trial_name": null, "trial_params": null }