{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.2597631645250693, "eval_steps": 500, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 9.5e-05, "loss": 37.0618, "step": 500 }, { "epoch": 0.06, "eval_loss": 27.398916244506836, "eval_runtime": 3.1349, "eval_samples_per_second": 31.899, "eval_steps_per_second": 4.147, "step": 500 }, { "epoch": 0.13, "learning_rate": 9e-05, "loss": 34.5311, "step": 1000 }, { "epoch": 0.13, "eval_loss": 36.926673889160156, "eval_runtime": 3.1612, "eval_samples_per_second": 31.633, "eval_steps_per_second": 4.112, "step": 1000 }, { "epoch": 0.19, "learning_rate": 8.5e-05, "loss": 33.5869, "step": 1500 }, { "epoch": 0.19, "eval_loss": 31.76376724243164, "eval_runtime": 3.1795, "eval_samples_per_second": 31.452, "eval_steps_per_second": 4.089, "step": 1500 }, { "epoch": 0.25, "learning_rate": 8e-05, "loss": 34.2002, "step": 2000 }, { "epoch": 0.25, "eval_loss": 33.623844146728516, "eval_runtime": 3.1768, "eval_samples_per_second": 31.479, "eval_steps_per_second": 4.092, "step": 2000 }, { "epoch": 0.31, "learning_rate": 7.500000000000001e-05, "loss": 34.0832, "step": 2500 }, { "epoch": 0.31, "eval_loss": 27.767799377441406, "eval_runtime": 3.166, "eval_samples_per_second": 31.586, "eval_steps_per_second": 4.106, "step": 2500 }, { "epoch": 0.38, "learning_rate": 7e-05, "loss": 32.413, "step": 3000 }, { "epoch": 0.38, "eval_loss": 34.710330963134766, "eval_runtime": 3.1812, "eval_samples_per_second": 31.434, "eval_steps_per_second": 4.086, "step": 3000 }, { "epoch": 0.44, "learning_rate": 6.500000000000001e-05, "loss": 33.3394, "step": 3500 }, { "epoch": 0.44, "eval_loss": 29.091354370117188, "eval_runtime": 3.1601, "eval_samples_per_second": 31.645, "eval_steps_per_second": 4.114, "step": 3500 }, { "epoch": 0.5, "learning_rate": 6e-05, "loss": 32.6119, "step": 4000 }, { "epoch": 0.5, "eval_loss": 30.3073673248291, "eval_runtime": 3.1555, "eval_samples_per_second": 31.691, "eval_steps_per_second": 4.12, "step": 4000 }, { "epoch": 0.57, "learning_rate": 5.500000000000001e-05, "loss": 33.4702, "step": 4500 }, { "epoch": 0.57, "eval_loss": 30.2010555267334, "eval_runtime": 3.1576, "eval_samples_per_second": 31.67, "eval_steps_per_second": 4.117, "step": 4500 }, { "epoch": 0.63, "learning_rate": 5e-05, "loss": 32.9516, "step": 5000 }, { "epoch": 0.63, "eval_loss": 28.628616333007812, "eval_runtime": 3.1678, "eval_samples_per_second": 31.568, "eval_steps_per_second": 4.104, "step": 5000 }, { "epoch": 0.69, "learning_rate": 4.5e-05, "loss": 32.9256, "step": 5500 }, { "epoch": 0.69, "eval_loss": 29.695850372314453, "eval_runtime": 3.1616, "eval_samples_per_second": 31.63, "eval_steps_per_second": 4.112, "step": 5500 }, { "epoch": 0.76, "learning_rate": 4e-05, "loss": 31.5253, "step": 6000 }, { "epoch": 0.76, "eval_loss": 24.25625228881836, "eval_runtime": 3.1405, "eval_samples_per_second": 31.842, "eval_steps_per_second": 4.14, "step": 6000 }, { "epoch": 0.82, "learning_rate": 3.5e-05, "loss": 34.0972, "step": 6500 }, { "epoch": 0.82, "eval_loss": 30.526784896850586, "eval_runtime": 3.1519, "eval_samples_per_second": 31.727, "eval_steps_per_second": 4.125, "step": 6500 }, { "epoch": 0.88, "learning_rate": 3e-05, "loss": 32.1422, "step": 7000 }, { "epoch": 0.88, "eval_loss": 29.38189697265625, "eval_runtime": 3.164, "eval_samples_per_second": 31.606, "eval_steps_per_second": 4.109, "step": 7000 }, { "epoch": 0.94, "learning_rate": 2.5e-05, "loss": 32.7259, "step": 7500 }, { "epoch": 0.94, "eval_loss": 31.17078399658203, "eval_runtime": 3.1747, "eval_samples_per_second": 31.499, "eval_steps_per_second": 4.095, "step": 7500 }, { "epoch": 1.01, "learning_rate": 2e-05, "loss": 31.0833, "step": 8000 }, { "epoch": 1.01, "eval_loss": 28.586397171020508, "eval_runtime": 3.1531, "eval_samples_per_second": 31.715, "eval_steps_per_second": 4.123, "step": 8000 }, { "epoch": 1.07, "learning_rate": 1.5e-05, "loss": 31.1692, "step": 8500 }, { "epoch": 1.07, "eval_loss": 32.3286247253418, "eval_runtime": 3.1659, "eval_samples_per_second": 31.586, "eval_steps_per_second": 4.106, "step": 8500 }, { "epoch": 1.13, "learning_rate": 1e-05, "loss": 31.318, "step": 9000 }, { "epoch": 1.13, "eval_loss": 30.532176971435547, "eval_runtime": 3.1657, "eval_samples_per_second": 31.588, "eval_steps_per_second": 4.106, "step": 9000 }, { "epoch": 1.2, "learning_rate": 5e-06, "loss": 30.6785, "step": 9500 }, { "epoch": 1.2, "eval_loss": 29.415210723876953, "eval_runtime": 3.1501, "eval_samples_per_second": 31.745, "eval_steps_per_second": 4.127, "step": 9500 }, { "epoch": 1.26, "learning_rate": 0.0, "loss": 31.4601, "step": 10000 }, { "epoch": 1.26, "eval_loss": 31.739580154418945, "eval_runtime": 3.1633, "eval_samples_per_second": 31.612, "eval_steps_per_second": 4.11, "step": 10000 } ], "logging_steps": 500, "max_steps": 10000, "num_train_epochs": 2, "save_steps": 5000, "total_flos": 1.3694633480945664e+17, "trial_name": null, "trial_params": null }