{ "best_metric": 0.34489724040031433, "best_model_checkpoint": "./checkpoint-my/checkpoint-1500", "epoch": 7.56789812129618, "eval_steps": 500, "global_step": 7500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.66, "learning_rate": 1.7375328083989503e-05, "loss": 4.7871, "step": 500 }, { "epoch": 0.66, "eval_bleu": 26.4661, "eval_gen_len": 31.9891, "eval_loss": 1.3954318761825562, "eval_runtime": 189.1048, "eval_samples_per_second": 5.352, "eval_steps_per_second": 1.338, "step": 500 }, { "epoch": 1.31, "learning_rate": 1.4750656167979002e-05, "loss": 0.3945, "step": 1000 }, { "epoch": 1.31, "eval_bleu": 27.1091, "eval_gen_len": 32.2134, "eval_loss": 0.34945473074913025, "eval_runtime": 187.8434, "eval_samples_per_second": 5.387, "eval_steps_per_second": 1.347, "step": 1000 }, { "epoch": 1.97, "learning_rate": 1.2125984251968505e-05, "loss": 0.145, "step": 1500 }, { "epoch": 1.97, "eval_bleu": 27.3119, "eval_gen_len": 32.2885, "eval_loss": 0.34489724040031433, "eval_runtime": 188.4135, "eval_samples_per_second": 5.371, "eval_steps_per_second": 1.343, "step": 1500 }, { "epoch": 2.62, "learning_rate": 1.650043744531934e-05, "loss": 0.3623, "step": 2000 }, { "epoch": 2.62, "eval_bleu": 3.0191, "eval_gen_len": 46.4427, "eval_loss": 0.7403104901313782, "eval_runtime": 245.2236, "eval_samples_per_second": 4.127, "eval_steps_per_second": 1.032, "step": 2000 }, { "epoch": 3.28, "learning_rate": 1.562554680664917e-05, "loss": 0.3224, "step": 2500 }, { "epoch": 3.28, "eval_bleu": 3.0664, "eval_gen_len": 46.9269, "eval_loss": 0.7314952611923218, "eval_runtime": 247.2109, "eval_samples_per_second": 4.094, "eval_steps_per_second": 1.023, "step": 2500 }, { "epoch": 3.94, "learning_rate": 1.4750656167979002e-05, "loss": 0.3066, "step": 3000 }, { "epoch": 3.94, "eval_bleu": 2.9075, "eval_gen_len": 47.915, "eval_loss": 0.726075291633606, "eval_runtime": 280.6593, "eval_samples_per_second": 3.606, "eval_steps_per_second": 0.901, "step": 3000 }, { "epoch": 4.59, "learning_rate": 1.3875765529308838e-05, "loss": 0.2952, "step": 3500 }, { "epoch": 4.59, "eval_bleu": 2.9214, "eval_gen_len": 47.5, "eval_loss": 0.7223652005195618, "eval_runtime": 268.7931, "eval_samples_per_second": 3.765, "eval_steps_per_second": 0.941, "step": 3500 }, { "epoch": 5.25, "learning_rate": 1.3000874890638671e-05, "loss": 0.2859, "step": 4000 }, { "epoch": 5.25, "eval_bleu": 3.0026, "eval_gen_len": 47.8577, "eval_loss": 0.7183992266654968, "eval_runtime": 271.7722, "eval_samples_per_second": 3.724, "eval_steps_per_second": 0.931, "step": 4000 }, { "epoch": 4.61, "learning_rate": 1.3846153846153847e-05, "loss": 0.1257, "step": 4500 }, { "epoch": 4.61, "eval_bleu": 27.2567, "eval_gen_len": 32.3172, "eval_loss": 0.3456554710865021, "eval_runtime": 190.3961, "eval_samples_per_second": 5.315, "eval_steps_per_second": 1.329, "step": 4500 }, { "epoch": 5.13, "learning_rate": 1.3162393162393164e-05, "loss": 0.1218, "step": 5000 }, { "epoch": 5.13, "eval_bleu": 27.4171, "eval_gen_len": 32.2915, "eval_loss": 0.3452778160572052, "eval_runtime": 193.9304, "eval_samples_per_second": 5.218, "eval_steps_per_second": 1.305, "step": 5000 }, { "epoch": 5.64, "learning_rate": 1.247863247863248e-05, "loss": 0.1189, "step": 5500 }, { "epoch": 5.64, "eval_bleu": 27.4615, "eval_gen_len": 32.2204, "eval_loss": 0.34533679485321045, "eval_runtime": 186.2569, "eval_samples_per_second": 5.433, "eval_steps_per_second": 1.358, "step": 5500 }, { "epoch": 6.15, "learning_rate": 1.1794871794871796e-05, "loss": 0.1166, "step": 6000 }, { "epoch": 6.15, "eval_bleu": 27.6321, "eval_gen_len": 32.2816, "eval_loss": 0.34552034735679626, "eval_runtime": 185.1476, "eval_samples_per_second": 5.466, "eval_steps_per_second": 1.366, "step": 6000 }, { "epoch": 6.56, "learning_rate": 1.125462495795493e-05, "loss": 0.1171, "step": 6500 }, { "epoch": 6.56, "eval_bleu": 27.3986, "eval_gen_len": 32.4397, "eval_loss": 0.34709280729293823, "eval_runtime": 204.1804, "eval_samples_per_second": 4.956, "eval_steps_per_second": 1.239, "step": 6500 }, { "epoch": 7.06, "learning_rate": 1.0581903800874538e-05, "loss": 0.1157, "step": 7000 }, { "epoch": 7.06, "eval_bleu": 27.5083, "eval_gen_len": 32.2816, "eval_loss": 0.34769660234451294, "eval_runtime": 209.4596, "eval_samples_per_second": 4.831, "eval_steps_per_second": 1.208, "step": 7000 }, { "epoch": 7.57, "learning_rate": 9.909182643794148e-06, "loss": 0.1121, "step": 7500 }, { "epoch": 7.57, "eval_bleu": 27.4437, "eval_gen_len": 32.2194, "eval_loss": 0.34789395332336426, "eval_runtime": 209.9641, "eval_samples_per_second": 4.82, "eval_steps_per_second": 1.205, "step": 7500 } ], "logging_steps": 500, "max_steps": 14865, "num_train_epochs": 15, "save_steps": 500, "total_flos": 5.2005742820563354e+17, "trial_name": null, "trial_params": null }