{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.962406015037594, "global_step": 160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3, "learning_rate": 6.25e-05, "loss": 10.8217, "step": 5 }, { "epoch": 0.6, "learning_rate": 9.995728791936504e-05, "loss": 10.8213, "step": 10 }, { "epoch": 0.9, "learning_rate": 9.947761466636014e-05, "loss": 10.8201, "step": 15 }, { "epoch": 0.96, "eval_loss": 10.819129943847656, "eval_runtime": 6.4186, "eval_samples_per_second": 69.174, "eval_steps_per_second": 17.294, "step": 16 }, { "epoch": 1.24, "learning_rate": 9.847001329696653e-05, "loss": 12.1716, "step": 20 }, { "epoch": 1.54, "learning_rate": 9.694523495787149e-05, "loss": 10.8181, "step": 25 }, { "epoch": 1.84, "learning_rate": 9.491954909459895e-05, "loss": 10.8167, "step": 30 }, { "epoch": 1.96, "eval_loss": 10.81454086303711, "eval_runtime": 6.3633, "eval_samples_per_second": 69.775, "eval_steps_per_second": 17.444, "step": 32 }, { "epoch": 2.18, "learning_rate": 9.241456985587868e-05, "loss": 12.1668, "step": 35 }, { "epoch": 2.48, "learning_rate": 8.945702546981969e-05, "loss": 10.8133, "step": 40 }, { "epoch": 2.78, "learning_rate": 8.60784730526531e-05, "loss": 10.8117, "step": 45 }, { "epoch": 2.96, "eval_loss": 10.809527397155762, "eval_runtime": 6.247, "eval_samples_per_second": 71.074, "eval_steps_per_second": 17.769, "step": 48 }, { "epoch": 3.12, "learning_rate": 8.231496189304704e-05, "loss": 12.1615, "step": 50 }, { "epoch": 3.42, "learning_rate": 7.820664880476256e-05, "loss": 10.8084, "step": 55 }, { "epoch": 3.72, "learning_rate": 7.379736965185368e-05, "loss": 10.8058, "step": 60 }, { "epoch": 3.96, "eval_loss": 10.802533149719238, "eval_runtime": 5.8936, "eval_samples_per_second": 75.335, "eval_steps_per_second": 18.834, "step": 64 }, { "epoch": 4.06, "learning_rate": 6.91341716182545e-05, "loss": 12.1538, "step": 65 }, { "epoch": 4.36, "learning_rate": 6.426681121245527e-05, "loss": 10.802, "step": 70 }, { "epoch": 4.66, "learning_rate": 5.924722336357793e-05, "loss": 10.8007, "step": 75 }, { "epoch": 4.96, "learning_rate": 5.4128967273616625e-05, "loss": 10.7997, "step": 80 }, { "epoch": 4.96, "eval_loss": 10.798870086669922, "eval_runtime": 6.2383, "eval_samples_per_second": 71.174, "eval_steps_per_second": 17.793, "step": 80 }, { "epoch": 5.3, "learning_rate": 4.8966654938622295e-05, "loss": 12.148, "step": 85 }, { "epoch": 5.6, "learning_rate": 4.381536843653262e-05, "loss": 10.7968, "step": 90 }, { "epoch": 5.9, "learning_rate": 3.87300721992097e-05, "loss": 10.7959, "step": 95 }, { "epoch": 5.96, "eval_loss": 10.794721603393555, "eval_runtime": 6.3176, "eval_samples_per_second": 70.28, "eval_steps_per_second": 17.57, "step": 96 }, { "epoch": 6.24, "learning_rate": 3.3765026539765834e-05, "loss": 12.1442, "step": 100 }, { "epoch": 6.54, "learning_rate": 2.8973208692864624e-05, "loss": 10.7943, "step": 105 }, { "epoch": 6.84, "learning_rate": 2.4405747545519963e-05, "loss": 10.7934, "step": 110 }, { "epoch": 6.96, "eval_loss": 10.792530059814453, "eval_runtime": 6.3054, "eval_samples_per_second": 70.416, "eval_steps_per_second": 17.604, "step": 112 }, { "epoch": 7.18, "learning_rate": 2.0111378089837956e-05, "loss": 12.1419, "step": 115 }, { "epoch": 7.48, "learning_rate": 1.6135921418712956e-05, "loss": 10.7926, "step": 120 }, { "epoch": 7.78, "learning_rate": 1.2521795812943704e-05, "loss": 10.7924, "step": 125 }, { "epoch": 7.96, "eval_loss": 10.791926383972168, "eval_runtime": 5.785, "eval_samples_per_second": 76.75, "eval_steps_per_second": 19.187, "step": 128 }, { "epoch": 8.12, "learning_rate": 9.307564136490254e-06, "loss": 12.1411, "step": 130 }, { "epoch": 8.42, "learning_rate": 6.527522369181655e-06, "loss": 10.7921, "step": 135 }, { "epoch": 8.72, "learning_rate": 4.2113336672471245e-06, "loss": 10.7921, "step": 140 }, { "epoch": 8.96, "eval_loss": 10.79179859161377, "eval_runtime": 6.2936, "eval_samples_per_second": 70.548, "eval_steps_per_second": 17.637, "step": 144 }, { "epoch": 9.06, "learning_rate": 2.3837118562592797e-06, "loss": 12.1411, "step": 145 }, { "epoch": 9.36, "learning_rate": 1.064157733632276e-06, "loss": 10.792, "step": 150 }, { "epoch": 9.66, "learning_rate": 2.667509943378721e-07, "loss": 10.792, "step": 155 }, { "epoch": 9.96, "learning_rate": 0.0, "loss": 10.792, "step": 160 }, { "epoch": 9.96, "eval_loss": 10.791767120361328, "eval_runtime": 5.8131, "eval_samples_per_second": 76.379, "eval_steps_per_second": 19.095, "step": 160 }, { "epoch": 9.96, "step": 160, "total_flos": 39596851200.0, "train_loss": 11.18234748840332, "train_runtime": 256.0921, "train_samples_per_second": 332.537, "train_steps_per_second": 0.625 } ], "max_steps": 160, "num_train_epochs": 10, "total_flos": 39596851200.0, "trial_name": null, "trial_params": null }