{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9993906154783668, "eval_steps": 100, "global_step": 615, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 0.0004, "loss": 1.3292, "step": 100 }, { "epoch": 0.16, "eval_loss": 1.264318823814392, "eval_runtime": 84.6316, "eval_samples_per_second": 6.121, "eval_steps_per_second": 1.536, "step": 100 }, { "epoch": 0.33, "learning_rate": 0.00036392754225221053, "loss": 1.2472, "step": 200 }, { "epoch": 0.33, "eval_loss": 1.2486543655395508, "eval_runtime": 84.6693, "eval_samples_per_second": 6.118, "eval_steps_per_second": 1.535, "step": 200 }, { "epoch": 0.49, "learning_rate": 0.00026872239108850267, "loss": 1.2331, "step": 300 }, { "epoch": 0.49, "eval_loss": 1.2404521703720093, "eval_runtime": 84.6623, "eval_samples_per_second": 6.118, "eval_steps_per_second": 1.536, "step": 300 }, { "epoch": 0.65, "learning_rate": 0.00014872738443612402, "loss": 1.2188, "step": 400 }, { "epoch": 0.65, "eval_loss": 1.2340422868728638, "eval_runtime": 84.6101, "eval_samples_per_second": 6.122, "eval_steps_per_second": 1.536, "step": 400 }, { "epoch": 0.81, "learning_rate": 4.722767036921105e-05, "loss": 1.2173, "step": 500 }, { "epoch": 0.81, "eval_loss": 1.2294026613235474, "eval_runtime": 84.6649, "eval_samples_per_second": 6.118, "eval_steps_per_second": 1.535, "step": 500 }, { "epoch": 0.98, "learning_rate": 8.366903586781493e-07, "loss": 1.2025, "step": 600 }, { "epoch": 0.98, "eval_loss": 1.2282078266143799, "eval_runtime": 84.629, "eval_samples_per_second": 6.121, "eval_steps_per_second": 1.536, "step": 600 } ], "logging_steps": 100, "max_steps": 615, "num_train_epochs": 1, "save_steps": 100, "total_flos": 8.660077507682304e+16, "trial_name": null, "trial_params": null }