{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.948148148148148, "global_step": 160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3, "learning_rate": 6.25e-05, "loss": 4.423, "step": 5 }, { "epoch": 0.59, "learning_rate": 9.995728791936504e-05, "loss": 3.358, "step": 10 }, { "epoch": 0.89, "learning_rate": 9.947761466636014e-05, "loss": 2.8568, "step": 15 }, { "epoch": 0.95, "eval_loss": 2.593674421310425, "eval_runtime": 7.3362, "eval_samples_per_second": 61.613, "eval_steps_per_second": 15.403, "step": 16 }, { "epoch": 1.24, "learning_rate": 9.847001329696653e-05, "loss": 3.0543, "step": 20 }, { "epoch": 1.53, "learning_rate": 9.694523495787149e-05, "loss": 2.3947, "step": 25 }, { "epoch": 1.83, "learning_rate": 9.491954909459895e-05, "loss": 2.2487, "step": 30 }, { "epoch": 1.95, "eval_loss": 2.1050145626068115, "eval_runtime": 7.5204, "eval_samples_per_second": 60.104, "eval_steps_per_second": 15.026, "step": 32 }, { "epoch": 2.18, "learning_rate": 9.241456985587868e-05, "loss": 2.4987, "step": 35 }, { "epoch": 2.47, "learning_rate": 8.945702546981969e-05, "loss": 2.0039, "step": 40 }, { "epoch": 2.77, "learning_rate": 8.60784730526531e-05, "loss": 1.9011, "step": 45 }, { "epoch": 2.95, "eval_loss": 1.80819571018219, "eval_runtime": 7.4002, "eval_samples_per_second": 61.079, "eval_steps_per_second": 15.27, "step": 48 }, { "epoch": 3.12, "learning_rate": 8.231496189304704e-05, "loss": 2.1532, "step": 50 }, { "epoch": 3.41, "learning_rate": 7.820664880476256e-05, "loss": 1.7384, "step": 55 }, { "epoch": 3.71, "learning_rate": 7.379736965185368e-05, "loss": 1.6837, "step": 60 }, { "epoch": 3.95, "eval_loss": 1.6178017854690552, "eval_runtime": 7.6447, "eval_samples_per_second": 59.126, "eval_steps_per_second": 14.781, "step": 64 }, { "epoch": 4.06, "learning_rate": 6.91341716182545e-05, "loss": 1.9079, "step": 65 }, { "epoch": 4.36, "learning_rate": 6.426681121245527e-05, "loss": 1.5592, "step": 70 }, { "epoch": 4.65, "learning_rate": 5.924722336357793e-05, "loss": 1.522, "step": 75 }, { "epoch": 4.95, "learning_rate": 5.4128967273616625e-05, "loss": 1.4887, "step": 80 }, { "epoch": 4.95, "eval_loss": 1.4896912574768066, "eval_runtime": 7.3322, "eval_samples_per_second": 61.646, "eval_steps_per_second": 15.411, "step": 80 }, { "epoch": 5.3, "learning_rate": 4.8966654938622295e-05, "loss": 1.6886, "step": 85 }, { "epoch": 5.59, "learning_rate": 4.381536843653262e-05, "loss": 1.4043, "step": 90 }, { "epoch": 5.89, "learning_rate": 3.87300721992097e-05, "loss": 1.3812, "step": 95 }, { "epoch": 5.95, "eval_loss": 1.4017213582992554, "eval_runtime": 7.2083, "eval_samples_per_second": 62.706, "eval_steps_per_second": 15.676, "step": 96 }, { "epoch": 6.24, "learning_rate": 3.3765026539765834e-05, "loss": 1.5807, "step": 100 }, { "epoch": 6.53, "learning_rate": 2.8973208692864624e-05, "loss": 1.3233, "step": 105 }, { "epoch": 6.83, "learning_rate": 2.4405747545519963e-05, "loss": 1.2944, "step": 110 }, { "epoch": 6.95, "eval_loss": 1.3437390327453613, "eval_runtime": 7.1152, "eval_samples_per_second": 63.526, "eval_steps_per_second": 15.882, "step": 112 }, { "epoch": 7.18, "learning_rate": 2.0111378089837956e-05, "loss": 1.5042, "step": 115 }, { "epoch": 7.47, "learning_rate": 1.6135921418712956e-05, "loss": 1.2607, "step": 120 }, { "epoch": 7.77, "learning_rate": 1.2521795812943704e-05, "loss": 1.2574, "step": 125 }, { "epoch": 7.95, "eval_loss": 1.3126752376556396, "eval_runtime": 7.3998, "eval_samples_per_second": 61.083, "eval_steps_per_second": 15.271, "step": 128 }, { "epoch": 8.12, "learning_rate": 9.307564136490254e-06, "loss": 1.4587, "step": 130 }, { "epoch": 8.41, "learning_rate": 6.527522369181655e-06, "loss": 1.2245, "step": 135 }, { "epoch": 8.71, "learning_rate": 4.2113336672471245e-06, "loss": 1.2325, "step": 140 }, { "epoch": 8.95, "eval_loss": 1.3008612394332886, "eval_runtime": 7.4909, "eval_samples_per_second": 60.34, "eval_steps_per_second": 15.085, "step": 144 }, { "epoch": 9.06, "learning_rate": 2.3837118562592797e-06, "loss": 1.4374, "step": 145 }, { "epoch": 9.36, "learning_rate": 1.064157733632276e-06, "loss": 1.2213, "step": 150 }, { "epoch": 9.65, "learning_rate": 2.667509943378721e-07, "loss": 1.2213, "step": 155 }, { "epoch": 9.95, "learning_rate": 0.0, "loss": 1.2223, "step": 160 }, { "epoch": 9.95, "eval_loss": 1.2986704111099243, "eval_runtime": 7.4403, "eval_samples_per_second": 60.751, "eval_steps_per_second": 15.188, "step": 160 }, { "epoch": 9.95, "step": 160, "total_flos": 8.010001210461389e+16, "train_loss": 1.8282876014709473, "train_runtime": 2056.929, "train_samples_per_second": 42.068, "train_steps_per_second": 0.078 } ], "max_steps": 160, "num_train_epochs": 10, "total_flos": 8.010001210461389e+16, "trial_name": null, "trial_params": null }