{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.109542245502164, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.73, "eval_loss": 0.04153425619006157, "eval_runtime": 134.1421, "eval_samples_per_second": 18.615, "eval_steps_per_second": 1.17, "step": 200 }, { "epoch": 1.46, "eval_loss": 0.03776169568300247, "eval_runtime": 133.8056, "eval_samples_per_second": 18.661, "eval_steps_per_second": 1.173, "step": 400 }, { "epoch": 1.82, "learning_rate": 8.60545524394929e-05, "loss": 1.1974, "step": 500 }, { "epoch": 2.19, "eval_loss": 0.038170840591192245, "eval_runtime": 133.7976, "eval_samples_per_second": 18.663, "eval_steps_per_second": 1.173, "step": 600 }, { "epoch": 2.92, "eval_loss": 0.03970686346292496, "eval_runtime": 133.7983, "eval_samples_per_second": 18.662, "eval_steps_per_second": 1.173, "step": 800 }, { "epoch": 3.64, "learning_rate": 6.684594698424894e-05, "loss": 0.0176, "step": 1000 }, { "epoch": 3.64, "eval_loss": 0.040207117795944214, "eval_runtime": 133.8022, "eval_samples_per_second": 18.662, "eval_steps_per_second": 1.173, "step": 1000 }, { "epoch": 4.37, "eval_loss": 0.04497513547539711, "eval_runtime": 133.8142, "eval_samples_per_second": 18.66, "eval_steps_per_second": 1.173, "step": 1200 }, { "epoch": 5.1, "eval_loss": 0.04816456884145737, "eval_runtime": 133.7956, "eval_samples_per_second": 18.663, "eval_steps_per_second": 1.173, "step": 1400 }, { "epoch": 5.47, "learning_rate": 4.7637341529005e-05, "loss": 0.0096, "step": 1500 }, { "epoch": 5.83, "eval_loss": 0.04493580386042595, "eval_runtime": 133.8039, "eval_samples_per_second": 18.662, "eval_steps_per_second": 1.173, "step": 1600 }, { "epoch": 6.56, "eval_loss": 0.04887638986110687, "eval_runtime": 133.8071, "eval_samples_per_second": 18.661, "eval_steps_per_second": 1.173, "step": 1800 }, { "epoch": 7.29, "learning_rate": 2.8428736073761048e-05, "loss": 0.0049, "step": 2000 }, { "epoch": 7.29, "eval_loss": 0.05439848080277443, "eval_runtime": 133.7974, "eval_samples_per_second": 18.663, "eval_steps_per_second": 1.173, "step": 2000 }, { "epoch": 8.02, "eval_loss": 0.055828921496868134, "eval_runtime": 133.8004, "eval_samples_per_second": 18.662, "eval_steps_per_second": 1.173, "step": 2200 }, { "epoch": 8.75, "eval_loss": 0.056505098938941956, "eval_runtime": 133.7919, "eval_samples_per_second": 18.663, "eval_steps_per_second": 1.173, "step": 2400 }, { "epoch": 9.11, "learning_rate": 9.220130618517096e-06, "loss": 0.0028, "step": 2500 } ], "max_steps": 2740, "num_train_epochs": 10, "total_flos": 1.9485556373716992e+17, "trial_name": null, "trial_params": null }