{ "best_metric": 1.1251049041748047, "best_model_checkpoint": "./mbartLarge_koja_mid2_run1/checkpoint-22708", "epoch": 6.0, "eval_steps": 500, "global_step": 68124, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 4.977981328166285e-05, "loss": 1.9241, "step": 500 }, { "epoch": 0.09, "learning_rate": 4.95596265633257e-05, "loss": 1.6494, "step": 1000 }, { "epoch": 0.13, "learning_rate": 4.933943984498855e-05, "loss": 1.5434, "step": 1500 }, { "epoch": 0.18, "learning_rate": 4.91192531266514e-05, "loss": 1.4836, "step": 2000 }, { "epoch": 0.22, "learning_rate": 4.889906640831425e-05, "loss": 1.4499, "step": 2500 }, { "epoch": 0.26, "learning_rate": 4.86788796899771e-05, "loss": 1.3934, "step": 3000 }, { "epoch": 0.31, "learning_rate": 4.845869297163995e-05, "loss": 1.3857, "step": 3500 }, { "epoch": 0.35, "learning_rate": 4.82385062533028e-05, "loss": 1.3396, "step": 4000 }, { "epoch": 0.4, "learning_rate": 4.801831953496565e-05, "loss": 1.3322, "step": 4500 }, { "epoch": 0.44, "learning_rate": 4.77981328166285e-05, "loss": 1.3106, "step": 5000 }, { "epoch": 0.48, "learning_rate": 4.757794609829135e-05, "loss": 1.2924, "step": 5500 }, { "epoch": 0.53, "learning_rate": 4.73577593799542e-05, "loss": 1.2834, "step": 6000 }, { "epoch": 0.57, "learning_rate": 4.713757266161705e-05, "loss": 1.2791, "step": 6500 }, { "epoch": 0.62, "learning_rate": 4.69173859432799e-05, "loss": 1.255, "step": 7000 }, { "epoch": 0.66, "learning_rate": 4.669719922494275e-05, "loss": 1.2573, "step": 7500 }, { "epoch": 0.7, "learning_rate": 4.64770125066056e-05, "loss": 1.2382, "step": 8000 }, { "epoch": 0.75, "learning_rate": 4.625682578826845e-05, "loss": 1.223, "step": 8500 }, { "epoch": 0.79, "learning_rate": 4.6036639069931303e-05, "loss": 1.2145, "step": 9000 }, { "epoch": 0.84, "learning_rate": 4.5816452351594153e-05, "loss": 1.219, "step": 9500 }, { "epoch": 0.88, "learning_rate": 4.5596265633257004e-05, "loss": 1.2125, "step": 10000 }, { "epoch": 0.92, "learning_rate": 4.5376078914919854e-05, "loss": 1.1996, "step": 10500 }, { "epoch": 0.97, "learning_rate": 4.5155892196582704e-05, "loss": 1.1823, "step": 11000 }, { "epoch": 1.0, "eval_bleu": 29.4501, "eval_gen_len": 18.8118, "eval_loss": 1.1695398092269897, "eval_runtime": 1716.1373, "eval_samples_per_second": 13.231, "eval_steps_per_second": 0.827, "step": 11354 }, { "epoch": 1.01, "learning_rate": 4.4935705478245554e-05, "loss": 1.1596, "step": 11500 }, { "epoch": 1.06, "learning_rate": 4.4715518759908404e-05, "loss": 1.0777, "step": 12000 }, { "epoch": 1.1, "learning_rate": 4.4495332041571254e-05, "loss": 1.0658, "step": 12500 }, { "epoch": 1.14, "learning_rate": 4.427514532323411e-05, "loss": 1.0296, "step": 13000 }, { "epoch": 1.19, "learning_rate": 4.4054958604896954e-05, "loss": 1.0269, "step": 13500 }, { "epoch": 1.23, "learning_rate": 4.3834771886559804e-05, "loss": 1.0192, "step": 14000 }, { "epoch": 1.28, "learning_rate": 4.3614585168222654e-05, "loss": 0.9946, "step": 14500 }, { "epoch": 1.32, "learning_rate": 4.3394398449885504e-05, "loss": 0.9996, "step": 15000 }, { "epoch": 1.37, "learning_rate": 4.3174211731548354e-05, "loss": 0.975, "step": 15500 }, { "epoch": 1.41, "learning_rate": 4.295402501321121e-05, "loss": 0.9874, "step": 16000 }, { "epoch": 1.45, "learning_rate": 4.2733838294874054e-05, "loss": 0.9683, "step": 16500 }, { "epoch": 1.5, "learning_rate": 4.2513651576536904e-05, "loss": 0.9675, "step": 17000 }, { "epoch": 1.54, "learning_rate": 4.2293464858199754e-05, "loss": 0.969, "step": 17500 }, { "epoch": 1.59, "learning_rate": 4.2073278139862604e-05, "loss": 0.9548, "step": 18000 }, { "epoch": 1.63, "learning_rate": 4.1853091421525454e-05, "loss": 0.9563, "step": 18500 }, { "epoch": 1.67, "learning_rate": 4.163290470318831e-05, "loss": 0.9536, "step": 19000 }, { "epoch": 1.72, "learning_rate": 4.1412717984851155e-05, "loss": 0.941, "step": 19500 }, { "epoch": 1.76, "learning_rate": 4.1192531266514005e-05, "loss": 0.9285, "step": 20000 }, { "epoch": 1.81, "learning_rate": 4.0972344548176855e-05, "loss": 0.9303, "step": 20500 }, { "epoch": 1.85, "learning_rate": 4.0752157829839705e-05, "loss": 0.9428, "step": 21000 }, { "epoch": 1.89, "learning_rate": 4.0531971111502555e-05, "loss": 0.9359, "step": 21500 }, { "epoch": 1.94, "learning_rate": 4.031178439316541e-05, "loss": 0.9302, "step": 22000 }, { "epoch": 1.98, "learning_rate": 4.0091597674828255e-05, "loss": 0.9207, "step": 22500 }, { "epoch": 2.0, "eval_bleu": 30.842, "eval_gen_len": 18.0892, "eval_loss": 1.1251049041748047, "eval_runtime": 1535.7063, "eval_samples_per_second": 14.786, "eval_steps_per_second": 0.925, "step": 22708 }, { "epoch": 2.03, "learning_rate": 3.9871410956491105e-05, "loss": 0.8788, "step": 23000 }, { "epoch": 2.07, "learning_rate": 3.9651224238153955e-05, "loss": 0.8367, "step": 23500 }, { "epoch": 2.11, "learning_rate": 3.9431037519816805e-05, "loss": 0.8196, "step": 24000 }, { "epoch": 2.16, "learning_rate": 3.921085080147966e-05, "loss": 0.7967, "step": 24500 }, { "epoch": 2.2, "learning_rate": 3.8990664083142505e-05, "loss": 0.7956, "step": 25000 }, { "epoch": 2.25, "learning_rate": 3.8770477364805355e-05, "loss": 0.7843, "step": 25500 }, { "epoch": 2.29, "learning_rate": 3.8550290646468205e-05, "loss": 0.7767, "step": 26000 }, { "epoch": 2.33, "learning_rate": 3.8330103928131055e-05, "loss": 0.7668, "step": 26500 }, { "epoch": 2.38, "learning_rate": 3.8109917209793905e-05, "loss": 0.754, "step": 27000 }, { "epoch": 2.42, "learning_rate": 3.788973049145676e-05, "loss": 0.7671, "step": 27500 }, { "epoch": 2.47, "learning_rate": 3.7669543773119605e-05, "loss": 0.7569, "step": 28000 }, { "epoch": 2.51, "learning_rate": 3.7449357054782456e-05, "loss": 0.7553, "step": 28500 }, { "epoch": 2.55, "learning_rate": 3.7229170336445306e-05, "loss": 0.7526, "step": 29000 }, { "epoch": 2.6, "learning_rate": 3.7008983618108156e-05, "loss": 0.7427, "step": 29500 }, { "epoch": 2.64, "learning_rate": 3.6788796899771006e-05, "loss": 0.7407, "step": 30000 }, { "epoch": 2.69, "learning_rate": 3.656861018143386e-05, "loss": 0.7418, "step": 30500 }, { "epoch": 2.73, "learning_rate": 3.6348423463096706e-05, "loss": 0.7301, "step": 31000 }, { "epoch": 2.77, "learning_rate": 3.6128236744759556e-05, "loss": 0.7218, "step": 31500 }, { "epoch": 2.82, "learning_rate": 3.5908050026422406e-05, "loss": 0.7232, "step": 32000 }, { "epoch": 2.86, "learning_rate": 3.5687863308085256e-05, "loss": 0.7375, "step": 32500 }, { "epoch": 2.91, "learning_rate": 3.5467676589748106e-05, "loss": 0.7261, "step": 33000 }, { "epoch": 2.95, "learning_rate": 3.524748987141096e-05, "loss": 0.7197, "step": 33500 }, { "epoch": 2.99, "learning_rate": 3.5027303153073806e-05, "loss": 0.7127, "step": 34000 }, { "epoch": 3.0, "eval_bleu": 31.2642, "eval_gen_len": 18.1188, "eval_loss": 1.168749451637268, "eval_runtime": 1511.0096, "eval_samples_per_second": 15.028, "eval_steps_per_second": 0.94, "step": 34062 }, { "epoch": 3.04, "learning_rate": 3.4807116434736656e-05, "loss": 0.6673, "step": 34500 }, { "epoch": 3.08, "learning_rate": 3.4586929716399506e-05, "loss": 0.6346, "step": 35000 }, { "epoch": 3.13, "learning_rate": 3.4366742998062356e-05, "loss": 0.6203, "step": 35500 }, { "epoch": 3.17, "learning_rate": 3.4146556279725206e-05, "loss": 0.605, "step": 36000 }, { "epoch": 3.21, "learning_rate": 3.392636956138806e-05, "loss": 0.5996, "step": 36500 }, { "epoch": 3.26, "learning_rate": 3.3706182843050906e-05, "loss": 0.5942, "step": 37000 }, { "epoch": 3.3, "learning_rate": 3.3485996124713757e-05, "loss": 0.5911, "step": 37500 }, { "epoch": 3.35, "learning_rate": 3.3265809406376607e-05, "loss": 0.5741, "step": 38000 }, { "epoch": 3.39, "learning_rate": 3.304562268803946e-05, "loss": 0.5807, "step": 38500 }, { "epoch": 3.43, "learning_rate": 3.2825435969702313e-05, "loss": 0.5777, "step": 39000 }, { "epoch": 3.48, "learning_rate": 3.2605249251365164e-05, "loss": 0.5823, "step": 39500 }, { "epoch": 3.52, "learning_rate": 3.238506253302801e-05, "loss": 0.5698, "step": 40000 }, { "epoch": 3.57, "learning_rate": 3.216487581469086e-05, "loss": 0.5727, "step": 40500 }, { "epoch": 3.61, "learning_rate": 3.194468909635371e-05, "loss": 0.5648, "step": 41000 }, { "epoch": 3.66, "learning_rate": 3.172450237801656e-05, "loss": 0.5641, "step": 41500 }, { "epoch": 3.7, "learning_rate": 3.1504315659679414e-05, "loss": 0.559, "step": 42000 }, { "epoch": 3.74, "learning_rate": 3.1284128941342264e-05, "loss": 0.5487, "step": 42500 }, { "epoch": 3.79, "learning_rate": 3.106394222300511e-05, "loss": 0.5521, "step": 43000 }, { "epoch": 3.83, "learning_rate": 3.084375550466796e-05, "loss": 0.5509, "step": 43500 }, { "epoch": 3.88, "learning_rate": 3.062356878633081e-05, "loss": 0.5557, "step": 44000 }, { "epoch": 3.92, "learning_rate": 3.0403382067993657e-05, "loss": 0.551, "step": 44500 }, { "epoch": 3.96, "learning_rate": 3.018319534965651e-05, "loss": 0.5406, "step": 45000 }, { "epoch": 4.0, "eval_bleu": 30.9531, "eval_gen_len": 17.9958, "eval_loss": 1.2618861198425293, "eval_runtime": 1469.856, "eval_samples_per_second": 15.448, "eval_steps_per_second": 0.966, "step": 45416 }, { "epoch": 4.01, "learning_rate": 2.996300863131936e-05, "loss": 0.5371, "step": 45500 }, { "epoch": 4.05, "learning_rate": 2.974282191298221e-05, "loss": 0.4843, "step": 46000 }, { "epoch": 4.1, "learning_rate": 2.952263519464506e-05, "loss": 0.4779, "step": 46500 }, { "epoch": 4.14, "learning_rate": 2.9302448476307908e-05, "loss": 0.4551, "step": 47000 }, { "epoch": 4.18, "learning_rate": 2.9082261757970758e-05, "loss": 0.4512, "step": 47500 }, { "epoch": 4.23, "learning_rate": 2.886207503963361e-05, "loss": 0.4464, "step": 48000 }, { "epoch": 4.27, "learning_rate": 2.864188832129646e-05, "loss": 0.4317, "step": 48500 }, { "epoch": 4.32, "learning_rate": 2.842170160295931e-05, "loss": 0.4392, "step": 49000 }, { "epoch": 4.36, "learning_rate": 2.820151488462216e-05, "loss": 0.4232, "step": 49500 }, { "epoch": 4.4, "learning_rate": 2.7981328166285008e-05, "loss": 0.4341, "step": 50000 }, { "epoch": 4.45, "learning_rate": 2.7761141447947865e-05, "loss": 0.4245, "step": 50500 }, { "epoch": 4.49, "learning_rate": 2.754095472961071e-05, "loss": 0.4266, "step": 51000 }, { "epoch": 4.54, "learning_rate": 2.732076801127356e-05, "loss": 0.4199, "step": 51500 }, { "epoch": 4.58, "learning_rate": 2.710058129293641e-05, "loss": 0.4266, "step": 52000 }, { "epoch": 4.62, "learning_rate": 2.688039457459926e-05, "loss": 0.4191, "step": 52500 }, { "epoch": 4.67, "learning_rate": 2.6660207856262108e-05, "loss": 0.4155, "step": 53000 }, { "epoch": 4.71, "learning_rate": 2.6440021137924965e-05, "loss": 0.4058, "step": 53500 }, { "epoch": 4.76, "learning_rate": 2.6219834419587812e-05, "loss": 0.4079, "step": 54000 }, { "epoch": 4.8, "learning_rate": 2.5999647701250662e-05, "loss": 0.4041, "step": 54500 }, { "epoch": 4.84, "learning_rate": 2.5779460982913512e-05, "loss": 0.4099, "step": 55000 }, { "epoch": 4.89, "learning_rate": 2.5559274264576362e-05, "loss": 0.4107, "step": 55500 }, { "epoch": 4.93, "learning_rate": 2.533908754623921e-05, "loss": 0.4016, "step": 56000 }, { "epoch": 4.98, "learning_rate": 2.5118900827902065e-05, "loss": 0.4027, "step": 56500 }, { "epoch": 5.0, "eval_bleu": 30.7923, "eval_gen_len": 18.0582, "eval_loss": 1.3788937330245972, "eval_runtime": 1470.5701, "eval_samples_per_second": 15.441, "eval_steps_per_second": 0.966, "step": 56770 }, { "epoch": 5.02, "learning_rate": 2.4898714109564912e-05, "loss": 0.3814, "step": 57000 }, { "epoch": 5.06, "learning_rate": 2.4678527391227762e-05, "loss": 0.3509, "step": 57500 }, { "epoch": 5.11, "learning_rate": 2.4458340672890612e-05, "loss": 0.3365, "step": 58000 }, { "epoch": 5.15, "learning_rate": 2.4238153954553462e-05, "loss": 0.3269, "step": 58500 }, { "epoch": 5.2, "learning_rate": 2.4017967236216312e-05, "loss": 0.3289, "step": 59000 }, { "epoch": 5.24, "learning_rate": 2.3797780517879162e-05, "loss": 0.3147, "step": 59500 }, { "epoch": 5.28, "learning_rate": 2.3577593799542012e-05, "loss": 0.3133, "step": 60000 }, { "epoch": 5.33, "learning_rate": 2.3357407081204862e-05, "loss": 0.3149, "step": 60500 }, { "epoch": 5.37, "learning_rate": 2.3137220362867713e-05, "loss": 0.3028, "step": 61000 }, { "epoch": 5.42, "learning_rate": 2.2917033644530563e-05, "loss": 0.313, "step": 61500 }, { "epoch": 5.46, "learning_rate": 2.2696846926193413e-05, "loss": 0.3081, "step": 62000 }, { "epoch": 5.5, "learning_rate": 2.2476660207856263e-05, "loss": 0.3068, "step": 62500 }, { "epoch": 5.55, "learning_rate": 2.2256473489519113e-05, "loss": 0.3054, "step": 63000 }, { "epoch": 5.59, "learning_rate": 2.2036286771181963e-05, "loss": 0.3015, "step": 63500 }, { "epoch": 5.64, "learning_rate": 2.1816100052844813e-05, "loss": 0.3043, "step": 64000 }, { "epoch": 5.68, "learning_rate": 2.1595913334507663e-05, "loss": 0.3009, "step": 64500 }, { "epoch": 5.72, "learning_rate": 2.1375726616170513e-05, "loss": 0.2964, "step": 65000 }, { "epoch": 5.77, "learning_rate": 2.1155539897833363e-05, "loss": 0.2883, "step": 65500 }, { "epoch": 5.81, "learning_rate": 2.0935353179496213e-05, "loss": 0.2958, "step": 66000 }, { "epoch": 5.86, "learning_rate": 2.0715166461159063e-05, "loss": 0.2985, "step": 66500 }, { "epoch": 5.9, "learning_rate": 2.0494979742821913e-05, "loss": 0.2975, "step": 67000 }, { "epoch": 5.95, "learning_rate": 2.0274793024484763e-05, "loss": 0.288, "step": 67500 }, { "epoch": 5.99, "learning_rate": 2.0054606306147613e-05, "loss": 0.286, "step": 68000 }, { "epoch": 6.0, "eval_bleu": 30.9393, "eval_gen_len": 18.1183, "eval_loss": 1.4784362316131592, "eval_runtime": 1467.697, "eval_samples_per_second": 15.471, "eval_steps_per_second": 0.968, "step": 68124 }, { "epoch": 6.0, "step": 68124, "total_flos": 2.3622407943975076e+18, "train_loss": 0.7323888317612346, "train_runtime": 87093.7472, "train_samples_per_second": 20.858, "train_steps_per_second": 1.304 } ], "logging_steps": 500, "max_steps": 113540, "num_train_epochs": 10, "save_steps": 500, "total_flos": 2.3622407943975076e+18, "trial_name": null, "trial_params": null }