{ "best_metric": 1.1251049041748047, "best_model_checkpoint": "./mbartLarge_koja_mid2_run1/checkpoint-22708", "epoch": 2.0, "eval_steps": 500, "global_step": 22708, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 4.977981328166285e-05, "loss": 1.9241, "step": 500 }, { "epoch": 0.09, "learning_rate": 4.95596265633257e-05, "loss": 1.6494, "step": 1000 }, { "epoch": 0.13, "learning_rate": 4.933943984498855e-05, "loss": 1.5434, "step": 1500 }, { "epoch": 0.18, "learning_rate": 4.91192531266514e-05, "loss": 1.4836, "step": 2000 }, { "epoch": 0.22, "learning_rate": 4.889906640831425e-05, "loss": 1.4499, "step": 2500 }, { "epoch": 0.26, "learning_rate": 4.86788796899771e-05, "loss": 1.3934, "step": 3000 }, { "epoch": 0.31, "learning_rate": 4.845869297163995e-05, "loss": 1.3857, "step": 3500 }, { "epoch": 0.35, "learning_rate": 4.82385062533028e-05, "loss": 1.3396, "step": 4000 }, { "epoch": 0.4, "learning_rate": 4.801831953496565e-05, "loss": 1.3322, "step": 4500 }, { "epoch": 0.44, "learning_rate": 4.77981328166285e-05, "loss": 1.3106, "step": 5000 }, { "epoch": 0.48, "learning_rate": 4.757794609829135e-05, "loss": 1.2924, "step": 5500 }, { "epoch": 0.53, "learning_rate": 4.73577593799542e-05, "loss": 1.2834, "step": 6000 }, { "epoch": 0.57, "learning_rate": 4.713757266161705e-05, "loss": 1.2791, "step": 6500 }, { "epoch": 0.62, "learning_rate": 4.69173859432799e-05, "loss": 1.255, "step": 7000 }, { "epoch": 0.66, "learning_rate": 4.669719922494275e-05, "loss": 1.2573, "step": 7500 }, { "epoch": 0.7, "learning_rate": 4.64770125066056e-05, "loss": 1.2382, "step": 8000 }, { "epoch": 0.75, "learning_rate": 4.625682578826845e-05, "loss": 1.223, "step": 8500 }, { "epoch": 0.79, "learning_rate": 4.6036639069931303e-05, "loss": 1.2145, "step": 9000 }, { "epoch": 0.84, "learning_rate": 4.5816452351594153e-05, "loss": 1.219, "step": 9500 }, { "epoch": 0.88, "learning_rate": 4.5596265633257004e-05, "loss": 1.2125, "step": 10000 }, { "epoch": 0.92, "learning_rate": 4.5376078914919854e-05, "loss": 1.1996, "step": 10500 }, { "epoch": 0.97, "learning_rate": 4.5155892196582704e-05, "loss": 1.1823, "step": 11000 }, { "epoch": 1.0, "eval_bleu": 29.4501, "eval_gen_len": 18.8118, "eval_loss": 1.1695398092269897, "eval_runtime": 1716.1373, "eval_samples_per_second": 13.231, "eval_steps_per_second": 0.827, "step": 11354 }, { "epoch": 1.01, "learning_rate": 4.4935705478245554e-05, "loss": 1.1596, "step": 11500 }, { "epoch": 1.06, "learning_rate": 4.4715518759908404e-05, "loss": 1.0777, "step": 12000 }, { "epoch": 1.1, "learning_rate": 4.4495332041571254e-05, "loss": 1.0658, "step": 12500 }, { "epoch": 1.14, "learning_rate": 4.427514532323411e-05, "loss": 1.0296, "step": 13000 }, { "epoch": 1.19, "learning_rate": 4.4054958604896954e-05, "loss": 1.0269, "step": 13500 }, { "epoch": 1.23, "learning_rate": 4.3834771886559804e-05, "loss": 1.0192, "step": 14000 }, { "epoch": 1.28, "learning_rate": 4.3614585168222654e-05, "loss": 0.9946, "step": 14500 }, { "epoch": 1.32, "learning_rate": 4.3394398449885504e-05, "loss": 0.9996, "step": 15000 }, { "epoch": 1.37, "learning_rate": 4.3174211731548354e-05, "loss": 0.975, "step": 15500 }, { "epoch": 1.41, "learning_rate": 4.295402501321121e-05, "loss": 0.9874, "step": 16000 }, { "epoch": 1.45, "learning_rate": 4.2733838294874054e-05, "loss": 0.9683, "step": 16500 }, { "epoch": 1.5, "learning_rate": 4.2513651576536904e-05, "loss": 0.9675, "step": 17000 }, { "epoch": 1.54, "learning_rate": 4.2293464858199754e-05, "loss": 0.969, "step": 17500 }, { "epoch": 1.59, "learning_rate": 4.2073278139862604e-05, "loss": 0.9548, "step": 18000 }, { "epoch": 1.63, "learning_rate": 4.1853091421525454e-05, "loss": 0.9563, "step": 18500 }, { "epoch": 1.67, "learning_rate": 4.163290470318831e-05, "loss": 0.9536, "step": 19000 }, { "epoch": 1.72, "learning_rate": 4.1412717984851155e-05, "loss": 0.941, "step": 19500 }, { "epoch": 1.76, "learning_rate": 4.1192531266514005e-05, "loss": 0.9285, "step": 20000 }, { "epoch": 1.81, "learning_rate": 4.0972344548176855e-05, "loss": 0.9303, "step": 20500 }, { "epoch": 1.85, "learning_rate": 4.0752157829839705e-05, "loss": 0.9428, "step": 21000 }, { "epoch": 1.89, "learning_rate": 4.0531971111502555e-05, "loss": 0.9359, "step": 21500 }, { "epoch": 1.94, "learning_rate": 4.031178439316541e-05, "loss": 0.9302, "step": 22000 }, { "epoch": 1.98, "learning_rate": 4.0091597674828255e-05, "loss": 0.9207, "step": 22500 }, { "epoch": 2.0, "eval_bleu": 30.842, "eval_gen_len": 18.0892, "eval_loss": 1.1251049041748047, "eval_runtime": 1535.7063, "eval_samples_per_second": 14.786, "eval_steps_per_second": 0.925, "step": 22708 } ], "logging_steps": 500, "max_steps": 113540, "num_train_epochs": 10, "save_steps": 500, "total_flos": 7.874829468219474e+17, "trial_name": null, "trial_params": null }