{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.1847133757961785, "eval_steps": 25, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 2.3797595190380762e-05, "loss": 1.5869, "step": 25 }, { "epoch": 0.16, "eval_loss": 1.343457579612732, "eval_runtime": 45.0202, "eval_samples_per_second": 6.775, "eval_steps_per_second": 0.444, "step": 25 }, { "epoch": 0.32, "learning_rate": 2.2545090180360722e-05, "loss": 0.998, "step": 50 }, { "epoch": 0.32, "eval_loss": 1.230006456375122, "eval_runtime": 45.0029, "eval_samples_per_second": 6.777, "eval_steps_per_second": 0.444, "step": 50 }, { "epoch": 0.48, "learning_rate": 2.1292585170340683e-05, "loss": 0.8627, "step": 75 }, { "epoch": 0.48, "eval_loss": 1.1211909055709839, "eval_runtime": 45.0426, "eval_samples_per_second": 6.771, "eval_steps_per_second": 0.444, "step": 75 }, { "epoch": 0.64, "learning_rate": 2.0040080160320643e-05, "loss": 0.8958, "step": 100 }, { "epoch": 0.64, "eval_loss": 1.1045145988464355, "eval_runtime": 45.2025, "eval_samples_per_second": 6.747, "eval_steps_per_second": 0.442, "step": 100 }, { "epoch": 0.8, "learning_rate": 1.87875751503006e-05, "loss": 0.8205, "step": 125 }, { "epoch": 0.8, "eval_loss": 1.0842006206512451, "eval_runtime": 45.2525, "eval_samples_per_second": 6.74, "eval_steps_per_second": 0.442, "step": 125 }, { "epoch": 0.96, "learning_rate": 1.7535070140280564e-05, "loss": 0.8141, "step": 150 }, { "epoch": 0.96, "eval_loss": 1.0894731283187866, "eval_runtime": 45.1805, "eval_samples_per_second": 6.751, "eval_steps_per_second": 0.443, "step": 150 }, { "epoch": 1.11, "learning_rate": 1.628256513026052e-05, "loss": 0.6384, "step": 175 }, { "epoch": 1.11, "eval_loss": 1.1109672784805298, "eval_runtime": 45.2193, "eval_samples_per_second": 6.745, "eval_steps_per_second": 0.442, "step": 175 }, { "epoch": 1.27, "learning_rate": 1.5030060120240483e-05, "loss": 0.5813, "step": 200 }, { "epoch": 1.27, "eval_loss": 1.1189604997634888, "eval_runtime": 45.2591, "eval_samples_per_second": 6.739, "eval_steps_per_second": 0.442, "step": 200 }, { "epoch": 1.43, "learning_rate": 1.3777555110220442e-05, "loss": 0.568, "step": 225 }, { "epoch": 1.43, "eval_loss": 1.1027684211730957, "eval_runtime": 45.2328, "eval_samples_per_second": 6.743, "eval_steps_per_second": 0.442, "step": 225 }, { "epoch": 1.59, "learning_rate": 1.25250501002004e-05, "loss": 0.5605, "step": 250 }, { "epoch": 1.59, "eval_loss": 1.1073968410491943, "eval_runtime": 45.4023, "eval_samples_per_second": 6.718, "eval_steps_per_second": 0.441, "step": 250 }, { "epoch": 1.75, "learning_rate": 1.1272545090180361e-05, "loss": 0.6342, "step": 275 }, { "epoch": 1.75, "eval_loss": 1.091808557510376, "eval_runtime": 45.3362, "eval_samples_per_second": 6.728, "eval_steps_per_second": 0.441, "step": 275 }, { "epoch": 1.91, "learning_rate": 1.0020040080160322e-05, "loss": 0.5254, "step": 300 }, { "epoch": 1.91, "eval_loss": 1.1066254377365112, "eval_runtime": 45.2929, "eval_samples_per_second": 6.734, "eval_steps_per_second": 0.442, "step": 300 }, { "epoch": 2.07, "learning_rate": 8.767535070140282e-06, "loss": 0.4553, "step": 325 }, { "epoch": 2.07, "eval_loss": 1.1803815364837646, "eval_runtime": 45.0968, "eval_samples_per_second": 6.763, "eval_steps_per_second": 0.443, "step": 325 }, { "epoch": 2.23, "learning_rate": 7.515030060120242e-06, "loss": 0.3712, "step": 350 }, { "epoch": 2.23, "eval_loss": 1.1674922704696655, "eval_runtime": 45.0974, "eval_samples_per_second": 6.763, "eval_steps_per_second": 0.443, "step": 350 }, { "epoch": 2.39, "learning_rate": 6.2625250501002e-06, "loss": 0.363, "step": 375 }, { "epoch": 2.39, "eval_loss": 1.180160403251648, "eval_runtime": 45.1473, "eval_samples_per_second": 6.756, "eval_steps_per_second": 0.443, "step": 375 }, { "epoch": 2.55, "learning_rate": 5.010020040080161e-06, "loss": 0.3732, "step": 400 }, { "epoch": 2.55, "eval_loss": 1.1637239456176758, "eval_runtime": 45.18, "eval_samples_per_second": 6.751, "eval_steps_per_second": 0.443, "step": 400 }, { "epoch": 2.71, "learning_rate": 3.757515030060121e-06, "loss": 0.4188, "step": 425 }, { "epoch": 2.71, "eval_loss": 1.1796648502349854, "eval_runtime": 45.1853, "eval_samples_per_second": 6.75, "eval_steps_per_second": 0.443, "step": 425 }, { "epoch": 2.87, "learning_rate": 2.5050100200400804e-06, "loss": 0.3395, "step": 450 }, { "epoch": 2.87, "eval_loss": 1.179069995880127, "eval_runtime": 45.3296, "eval_samples_per_second": 6.728, "eval_steps_per_second": 0.441, "step": 450 }, { "epoch": 3.03, "learning_rate": 1.2525050100200402e-06, "loss": 0.3736, "step": 475 }, { "epoch": 3.03, "eval_loss": 1.176107406616211, "eval_runtime": 45.2686, "eval_samples_per_second": 6.738, "eval_steps_per_second": 0.442, "step": 475 }, { "epoch": 3.18, "learning_rate": 0.0, "loss": 0.2538, "step": 500 }, { "epoch": 3.18, "eval_loss": 1.2055637836456299, "eval_runtime": 45.3063, "eval_samples_per_second": 6.732, "eval_steps_per_second": 0.441, "step": 500 } ], "logging_steps": 25, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 25, "total_flos": 6.571164044781158e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }