{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.0, "global_step": 6144, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.98, "learning_rate": 4.755859375e-05, "loss": 228.0322, "step": 500 }, { "epoch": 1.0, "eval_loss": 95.51275634765625, "eval_runtime": 19.2239, "eval_samples_per_second": 84.738, "eval_steps_per_second": 5.306, "step": 512 }, { "epoch": 1.95, "learning_rate": 4.5117187500000005e-05, "loss": 102.3494, "step": 1000 }, { "epoch": 2.0, "eval_loss": 94.07129669189453, "eval_runtime": 19.254, "eval_samples_per_second": 84.606, "eval_steps_per_second": 5.298, "step": 1024 }, { "epoch": 2.93, "learning_rate": 4.267578125e-05, "loss": 90.6642, "step": 1500 }, { "epoch": 3.0, "eval_loss": 83.28936767578125, "eval_runtime": 19.2397, "eval_samples_per_second": 84.669, "eval_steps_per_second": 5.302, "step": 1536 }, { "epoch": 3.91, "learning_rate": 4.0234375e-05, "loss": 78.5213, "step": 2000 }, { "epoch": 4.0, "eval_loss": 83.8617172241211, "eval_runtime": 19.2535, "eval_samples_per_second": 84.608, "eval_steps_per_second": 5.298, "step": 2048 }, { "epoch": 4.88, "learning_rate": 3.7792968750000005e-05, "loss": 70.4173, "step": 2500 }, { "epoch": 5.0, "eval_loss": 79.97139739990234, "eval_runtime": 19.2503, "eval_samples_per_second": 84.622, "eval_steps_per_second": 5.299, "step": 2560 }, { "epoch": 5.86, "learning_rate": 3.53515625e-05, "loss": 63.1215, "step": 3000 }, { "epoch": 6.0, "eval_loss": 81.74008178710938, "eval_runtime": 19.2347, "eval_samples_per_second": 84.69, "eval_steps_per_second": 5.303, "step": 3072 }, { "epoch": 6.84, "learning_rate": 3.291015625e-05, "loss": 55.0817, "step": 3500 }, { "epoch": 7.0, "eval_loss": 80.47997283935547, "eval_runtime": 19.239, "eval_samples_per_second": 84.672, "eval_steps_per_second": 5.302, "step": 3584 }, { "epoch": 7.81, "learning_rate": 3.0468750000000002e-05, "loss": 49.4482, "step": 4000 }, { "epoch": 8.0, "eval_loss": 78.9246597290039, "eval_runtime": 19.2369, "eval_samples_per_second": 84.681, "eval_steps_per_second": 5.302, "step": 4096 }, { "epoch": 8.79, "learning_rate": 2.802734375e-05, "loss": 45.7534, "step": 4500 }, { "epoch": 9.0, "eval_loss": 79.2200927734375, "eval_runtime": 19.2428, "eval_samples_per_second": 84.655, "eval_steps_per_second": 5.301, "step": 4608 }, { "epoch": 9.77, "learning_rate": 2.55859375e-05, "loss": 43.1915, "step": 5000 }, { "epoch": 10.0, "eval_loss": 81.24749755859375, "eval_runtime": 19.2441, "eval_samples_per_second": 84.649, "eval_steps_per_second": 5.3, "step": 5120 }, { "epoch": 10.74, "learning_rate": 2.3144531250000002e-05, "loss": 41.0389, "step": 5500 }, { "epoch": 11.0, "eval_loss": 78.5303955078125, "eval_runtime": 19.2504, "eval_samples_per_second": 84.622, "eval_steps_per_second": 5.299, "step": 5632 }, { "epoch": 11.72, "learning_rate": 2.0703125e-05, "loss": 38.4756, "step": 6000 }, { "epoch": 12.0, "eval_loss": 77.77259826660156, "eval_runtime": 19.2175, "eval_samples_per_second": 84.766, "eval_steps_per_second": 5.308, "step": 6144 } ], "max_steps": 10240, "num_train_epochs": 20, "total_flos": 2.582451944655667e+16, "trial_name": null, "trial_params": null }