{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.8444444444444446, "eval_steps": 60, "global_step": 960, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 0.0002, "loss": 1.293, "step": 60 }, { "epoch": 0.18, "eval_runtime": 313.4076, "eval_samples_per_second": 1.914, "eval_steps_per_second": 0.239, "step": 60 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 0.6736, "step": 120 }, { "epoch": 0.36, "eval_runtime": 313.3616, "eval_samples_per_second": 1.915, "eval_steps_per_second": 0.239, "step": 120 }, { "epoch": 0.53, "learning_rate": 0.0002, "loss": 0.6332, "step": 180 }, { "epoch": 0.53, "eval_runtime": 313.7062, "eval_samples_per_second": 1.913, "eval_steps_per_second": 0.239, "step": 180 }, { "epoch": 0.71, "learning_rate": 0.0002, "loss": 0.6116, "step": 240 }, { "epoch": 0.71, "eval_runtime": 313.0801, "eval_samples_per_second": 1.916, "eval_steps_per_second": 0.24, "step": 240 }, { "epoch": 0.89, "learning_rate": 0.0002, "loss": 0.633, "step": 300 }, { "epoch": 0.89, "eval_runtime": 312.6613, "eval_samples_per_second": 1.919, "eval_steps_per_second": 0.24, "step": 300 }, { "epoch": 1.07, "learning_rate": 0.0002, "loss": 0.5895, "step": 360 }, { "epoch": 1.07, "eval_runtime": 313.3051, "eval_samples_per_second": 1.915, "eval_steps_per_second": 0.239, "step": 360 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 0.5845, "step": 420 }, { "epoch": 1.24, "eval_runtime": 313.2319, "eval_samples_per_second": 1.916, "eval_steps_per_second": 0.239, "step": 420 }, { "epoch": 1.42, "learning_rate": 0.0002, "loss": 0.5876, "step": 480 }, { "epoch": 1.42, "eval_runtime": 313.3956, "eval_samples_per_second": 1.915, "eval_steps_per_second": 0.239, "step": 480 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 0.569, "step": 540 }, { "epoch": 1.6, "eval_runtime": 313.3538, "eval_samples_per_second": 1.915, "eval_steps_per_second": 0.239, "step": 540 }, { "epoch": 1.78, "learning_rate": 0.0002, "loss": 0.5487, "step": 600 }, { "epoch": 1.78, "eval_runtime": 312.5543, "eval_samples_per_second": 1.92, "eval_steps_per_second": 0.24, "step": 600 }, { "epoch": 1.96, "learning_rate": 0.0002, "loss": 0.5531, "step": 660 }, { "epoch": 1.96, "eval_runtime": 312.6322, "eval_samples_per_second": 1.919, "eval_steps_per_second": 0.24, "step": 660 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 0.5315, "step": 720 }, { "epoch": 2.13, "eval_runtime": 312.3646, "eval_samples_per_second": 1.921, "eval_steps_per_second": 0.24, "step": 720 }, { "epoch": 2.31, "learning_rate": 0.0002, "loss": 0.5064, "step": 780 }, { "epoch": 2.31, "eval_runtime": 312.4619, "eval_samples_per_second": 1.92, "eval_steps_per_second": 0.24, "step": 780 }, { "epoch": 2.49, "learning_rate": 0.0002, "loss": 0.5125, "step": 840 }, { "epoch": 2.49, "eval_runtime": 312.7044, "eval_samples_per_second": 1.919, "eval_steps_per_second": 0.24, "step": 840 }, { "epoch": 2.67, "learning_rate": 0.0002, "loss": 0.476, "step": 900 }, { "epoch": 2.67, "eval_runtime": 312.5341, "eval_samples_per_second": 1.92, "eval_steps_per_second": 0.24, "step": 900 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 0.5142, "step": 960 }, { "epoch": 2.84, "eval_runtime": 312.6866, "eval_samples_per_second": 1.919, "eval_steps_per_second": 0.24, "step": 960 } ], "logging_steps": 60, "max_steps": 1011, "num_train_epochs": 3, "save_steps": 60, "total_flos": 1.5945703889043456e+17, "trial_name": null, "trial_params": null }