{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.98463901689708, "global_step": 13000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.38, "learning_rate": 1.923195084485407e-05, "loss": 2.0051, "step": 500 }, { "epoch": 0.77, "learning_rate": 1.8463901689708145e-05, "loss": 1.6247, "step": 1000 }, { "epoch": 1.0, "eval_loss": 1.3888643980026245, "eval_runtime": 7.9545, "eval_samples_per_second": 653.846, "eval_steps_per_second": 81.841, "step": 1302 }, { "epoch": 1.15, "learning_rate": 1.7695852534562215e-05, "loss": 1.4928, "step": 1500 }, { "epoch": 1.54, "learning_rate": 1.6927803379416285e-05, "loss": 1.3947, "step": 2000 }, { "epoch": 1.92, "learning_rate": 1.6159754224270355e-05, "loss": 1.3445, "step": 2500 }, { "epoch": 2.0, "eval_loss": 1.201552152633667, "eval_runtime": 8.1327, "eval_samples_per_second": 639.513, "eval_steps_per_second": 80.047, "step": 2604 }, { "epoch": 2.3, "learning_rate": 1.5391705069124425e-05, "loss": 1.2667, "step": 3000 }, { "epoch": 2.69, "learning_rate": 1.4623655913978497e-05, "loss": 1.2231, "step": 3500 }, { "epoch": 3.0, "eval_loss": 1.0806881189346313, "eval_runtime": 7.9883, "eval_samples_per_second": 651.08, "eval_steps_per_second": 81.495, "step": 3906 }, { "epoch": 3.07, "learning_rate": 1.3855606758832567e-05, "loss": 1.2226, "step": 4000 }, { "epoch": 3.46, "learning_rate": 1.3087557603686638e-05, "loss": 1.1666, "step": 4500 }, { "epoch": 3.84, "learning_rate": 1.2319508448540707e-05, "loss": 1.1263, "step": 5000 }, { "epoch": 4.0, "eval_loss": 1.024440050125122, "eval_runtime": 7.9462, "eval_samples_per_second": 654.524, "eval_steps_per_second": 81.926, "step": 5208 }, { "epoch": 4.22, "learning_rate": 1.1551459293394778e-05, "loss": 1.1107, "step": 5500 }, { "epoch": 4.61, "learning_rate": 1.0783410138248848e-05, "loss": 1.0675, "step": 6000 }, { "epoch": 4.99, "learning_rate": 1.001536098310292e-05, "loss": 1.062, "step": 6500 }, { "epoch": 5.0, "eval_loss": 0.9445285797119141, "eval_runtime": 8.0272, "eval_samples_per_second": 647.921, "eval_steps_per_second": 81.099, "step": 6510 }, { "epoch": 5.38, "learning_rate": 9.24731182795699e-06, "loss": 1.03, "step": 7000 }, { "epoch": 5.76, "learning_rate": 8.47926267281106e-06, "loss": 1.0055, "step": 7500 }, { "epoch": 6.0, "eval_loss": 0.9380741715431213, "eval_runtime": 8.0529, "eval_samples_per_second": 645.851, "eval_steps_per_second": 80.84, "step": 7812 }, { "epoch": 6.14, "learning_rate": 7.711213517665132e-06, "loss": 0.9869, "step": 8000 }, { "epoch": 6.53, "learning_rate": 6.9431643625192015e-06, "loss": 1.0125, "step": 8500 }, { "epoch": 6.91, "learning_rate": 6.175115207373272e-06, "loss": 1.004, "step": 9000 }, { "epoch": 7.0, "eval_loss": 0.902702271938324, "eval_runtime": 8.0927, "eval_samples_per_second": 642.681, "eval_steps_per_second": 80.443, "step": 9114 }, { "epoch": 7.3, "learning_rate": 5.407066052227343e-06, "loss": 0.9893, "step": 9500 }, { "epoch": 7.68, "learning_rate": 4.639016897081414e-06, "loss": 0.9525, "step": 10000 }, { "epoch": 8.0, "eval_loss": 0.8645159006118774, "eval_runtime": 8.0556, "eval_samples_per_second": 645.641, "eval_steps_per_second": 80.814, "step": 10416 }, { "epoch": 8.06, "learning_rate": 3.870967741935484e-06, "loss": 0.9761, "step": 10500 }, { "epoch": 8.45, "learning_rate": 3.1029185867895553e-06, "loss": 0.9469, "step": 11000 }, { "epoch": 8.83, "learning_rate": 2.3348694316436257e-06, "loss": 0.9166, "step": 11500 }, { "epoch": 9.0, "eval_loss": 0.8442177176475525, "eval_runtime": 8.0728, "eval_samples_per_second": 644.264, "eval_steps_per_second": 80.641, "step": 11718 }, { "epoch": 9.22, "learning_rate": 1.5668202764976959e-06, "loss": 0.9201, "step": 12000 }, { "epoch": 9.6, "learning_rate": 7.987711213517666e-07, "loss": 0.9143, "step": 12500 }, { "epoch": 9.98, "learning_rate": 3.0721966205837177e-08, "loss": 0.9314, "step": 13000 } ], "max_steps": 13020, "num_train_epochs": 10, "total_flos": 673236987330600.0, "trial_name": null, "trial_params": null }