{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5, "grad_norm": 2.015625, "learning_rate": 0.0001, "loss": 2.9957, "step": 1 }, { "epoch": 1.0, "eval_loss": 3.1984338760375977, "eval_runtime": 0.465, "eval_samples_per_second": 21.504, "eval_steps_per_second": 2.15, "step": 2 }, { "epoch": 2.0, "eval_loss": 3.0672874450683594, "eval_runtime": 0.4711, "eval_samples_per_second": 21.225, "eval_steps_per_second": 2.122, "step": 4 }, { "epoch": 2.5, "grad_norm": 2.140625, "learning_rate": 0.00018660254037844388, "loss": 2.8525, "step": 5 }, { "epoch": 3.0, "eval_loss": 2.9477875232696533, "eval_runtime": 0.4781, "eval_samples_per_second": 20.915, "eval_steps_per_second": 2.091, "step": 6 }, { "epoch": 4.0, "eval_loss": 2.8806045055389404, "eval_runtime": 0.4817, "eval_samples_per_second": 20.76, "eval_steps_per_second": 2.076, "step": 8 }, { "epoch": 5.0, "grad_norm": 1.2734375, "learning_rate": 0.00011736481776669306, "loss": 2.3323, "step": 10 }, { "epoch": 5.0, "eval_loss": 2.83823823928833, "eval_runtime": 0.4837, "eval_samples_per_second": 20.676, "eval_steps_per_second": 2.068, "step": 10 }, { "epoch": 6.0, "eval_loss": 2.8050312995910645, "eval_runtime": 0.4968, "eval_samples_per_second": 20.128, "eval_steps_per_second": 2.013, "step": 12 }, { "epoch": 7.0, "eval_loss": 2.7636163234710693, "eval_runtime": 0.5049, "eval_samples_per_second": 19.805, "eval_steps_per_second": 1.98, "step": 14 }, { "epoch": 7.5, "grad_norm": 1.125, "learning_rate": 3.5721239031346066e-05, "loss": 2.0887, "step": 15 }, { "epoch": 8.0, "eval_loss": 2.7495064735412598, "eval_runtime": 0.5092, "eval_samples_per_second": 19.638, "eval_steps_per_second": 1.964, "step": 16 }, { "epoch": 9.0, "eval_loss": 2.743269681930542, "eval_runtime": 0.5168, "eval_samples_per_second": 19.35, "eval_steps_per_second": 1.935, "step": 18 }, { "epoch": 10.0, "grad_norm": 0.9375, "learning_rate": 0.0, "loss": 1.9997, "step": 20 }, { "epoch": 10.0, "eval_loss": 2.742553472518921, "eval_runtime": 0.5167, "eval_samples_per_second": 19.353, "eval_steps_per_second": 1.935, "step": 20 }, { "epoch": 10.0, "step": 20, "total_flos": 1.56632893882368e+16, "train_loss": 2.325469994544983, "train_runtime": 70.3098, "train_samples_per_second": 14.792, "train_steps_per_second": 0.284 } ], "logging_steps": 5, "max_steps": 20, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 100, "total_flos": 1.56632893882368e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }