{ "best_metric": 1.652644395828247, "best_model_checkpoint": "output/nirvana/checkpoint-144", "epoch": 4.0, "global_step": 144, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 0.00013040646433810595, "loss": 2.8877, "step": 5 }, { "epoch": 0.29, "learning_rate": 0.00011137140040750913, "loss": 2.7784, "step": 10 }, { "epoch": 0.43, "learning_rate": 8.386493606940316e-05, "loss": 2.2959, "step": 15 }, { "epoch": 0.57, "learning_rate": 5.3335063930596836e-05, "loss": 2.5076, "step": 20 }, { "epoch": 0.71, "learning_rate": 2.5828599592490882e-05, "loss": 2.4917, "step": 25 }, { "epoch": 0.86, "learning_rate": 6.793535661894054e-06, "loss": 2.4772, "step": 30 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 2.6382, "step": 35 }, { "epoch": 1.0, "eval_loss": 2.5190556049346924, "eval_runtime": 2.4884, "eval_samples_per_second": 22.504, "eval_steps_per_second": 2.813, "step": 35 }, { "epoch": 1.0, "eval_loss": 2.3264973163604736, "eval_runtime": 1.1647, "eval_samples_per_second": 45.506, "eval_steps_per_second": 6.01, "step": 36 }, { "epoch": 1.11, "learning_rate": 4.137086214086682e-06, "loss": 2.6774, "step": 40 }, { "epoch": 1.25, "learning_rate": 2.009247481060283e-05, "loss": 2.4577, "step": 45 }, { "epoch": 1.39, "learning_rate": 4.513741816785908e-05, "loss": 2.2823, "step": 50 }, { "epoch": 1.53, "learning_rate": 7.457888395248933e-05, "loss": 2.298, "step": 55 }, { "epoch": 1.67, "learning_rate": 0.00010290000000000001, "loss": 2.4088, "step": 60 }, { "epoch": 1.81, "learning_rate": 0.00012479383023822482, "loss": 2.2944, "step": 65 }, { "epoch": 1.94, "learning_rate": 0.00013615781185663748, "loss": 2.0032, "step": 70 }, { "epoch": 2.0, "eval_loss": 2.128573179244995, "eval_runtime": 1.1938, "eval_samples_per_second": 44.396, "eval_steps_per_second": 5.864, "step": 72 }, { "epoch": 2.0, "eval_loss": 1.8826501369476318, "eval_runtime": 1.8444, "eval_samples_per_second": 22.772, "eval_steps_per_second": 3.253, "step": 74 }, { "epoch": 2.03, "learning_rate": 0.0001369528677140173, "loss": 2.0939, "step": 75 }, { "epoch": 2.16, "learning_rate": 0.00012848876816285752, "loss": 2.1654, "step": 80 }, { "epoch": 2.3, "learning_rate": 0.00010939183589447406, "loss": 2.0452, "step": 85 }, { "epoch": 2.43, "learning_rate": 8.305259792170682e-05, "loss": 1.7357, "step": 90 }, { "epoch": 2.57, "learning_rate": 5.414740207829316e-05, "loss": 2.0788, "step": 95 }, { "epoch": 2.7, "learning_rate": 2.7808164105525978e-05, "loss": 1.928, "step": 100 }, { "epoch": 2.84, "learning_rate": 8.711231837142545e-06, "loss": 1.9376, "step": 105 }, { "epoch": 2.97, "learning_rate": 2.4713228598268823e-07, "loss": 1.9652, "step": 110 }, { "epoch": 3.0, "eval_loss": 1.7358663082122803, "eval_runtime": 1.8478, "eval_samples_per_second": 22.729, "eval_steps_per_second": 3.247, "step": 111 }, { "epoch": 3.19, "learning_rate": 1.2406169761775193e-05, "loss": 1.9469, "step": 115 }, { "epoch": 3.33, "learning_rate": 3.4300000000000014e-05, "loss": 1.7381, "step": 120 }, { "epoch": 3.47, "learning_rate": 6.262111604751063e-05, "loss": 1.8423, "step": 125 }, { "epoch": 3.61, "learning_rate": 9.206258183214083e-05, "loss": 1.6384, "step": 130 }, { "epoch": 3.75, "learning_rate": 0.0001171075251893971, "loss": 2.0094, "step": 135 }, { "epoch": 3.89, "learning_rate": 0.00013306291378591332, "loss": 1.8349, "step": 140 }, { "epoch": 4.0, "eval_loss": 1.652644395828247, "eval_runtime": 0.6674, "eval_samples_per_second": 74.92, "eval_steps_per_second": 10.489, "step": 144 } ], "max_steps": 144, "num_train_epochs": 4, "total_flos": 146715475968000.0, "trial_name": null, "trial_params": null }