|
{ |
|
"best_metric": 1.1251049041748047, |
|
"best_model_checkpoint": "./mbartLarge_koja_mid2_run1/checkpoint-22708", |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 68124, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.977981328166285e-05, |
|
"loss": 1.9241, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.95596265633257e-05, |
|
"loss": 1.6494, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.933943984498855e-05, |
|
"loss": 1.5434, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.91192531266514e-05, |
|
"loss": 1.4836, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.889906640831425e-05, |
|
"loss": 1.4499, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.86788796899771e-05, |
|
"loss": 1.3934, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.845869297163995e-05, |
|
"loss": 1.3857, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.82385062533028e-05, |
|
"loss": 1.3396, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.801831953496565e-05, |
|
"loss": 1.3322, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.77981328166285e-05, |
|
"loss": 1.3106, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.757794609829135e-05, |
|
"loss": 1.2924, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.73577593799542e-05, |
|
"loss": 1.2834, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.713757266161705e-05, |
|
"loss": 1.2791, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.69173859432799e-05, |
|
"loss": 1.255, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.669719922494275e-05, |
|
"loss": 1.2573, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.64770125066056e-05, |
|
"loss": 1.2382, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.625682578826845e-05, |
|
"loss": 1.223, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.6036639069931303e-05, |
|
"loss": 1.2145, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.5816452351594153e-05, |
|
"loss": 1.219, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.5596265633257004e-05, |
|
"loss": 1.2125, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.5376078914919854e-05, |
|
"loss": 1.1996, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.5155892196582704e-05, |
|
"loss": 1.1823, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 29.4501, |
|
"eval_gen_len": 18.8118, |
|
"eval_loss": 1.1695398092269897, |
|
"eval_runtime": 1716.1373, |
|
"eval_samples_per_second": 13.231, |
|
"eval_steps_per_second": 0.827, |
|
"step": 11354 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.4935705478245554e-05, |
|
"loss": 1.1596, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.4715518759908404e-05, |
|
"loss": 1.0777, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.4495332041571254e-05, |
|
"loss": 1.0658, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.427514532323411e-05, |
|
"loss": 1.0296, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.4054958604896954e-05, |
|
"loss": 1.0269, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.3834771886559804e-05, |
|
"loss": 1.0192, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.3614585168222654e-05, |
|
"loss": 0.9946, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.3394398449885504e-05, |
|
"loss": 0.9996, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.3174211731548354e-05, |
|
"loss": 0.975, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.295402501321121e-05, |
|
"loss": 0.9874, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.2733838294874054e-05, |
|
"loss": 0.9683, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.2513651576536904e-05, |
|
"loss": 0.9675, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.2293464858199754e-05, |
|
"loss": 0.969, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.2073278139862604e-05, |
|
"loss": 0.9548, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.1853091421525454e-05, |
|
"loss": 0.9563, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.163290470318831e-05, |
|
"loss": 0.9536, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.1412717984851155e-05, |
|
"loss": 0.941, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.1192531266514005e-05, |
|
"loss": 0.9285, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.0972344548176855e-05, |
|
"loss": 0.9303, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.0752157829839705e-05, |
|
"loss": 0.9428, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 4.0531971111502555e-05, |
|
"loss": 0.9359, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 4.031178439316541e-05, |
|
"loss": 0.9302, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 4.0091597674828255e-05, |
|
"loss": 0.9207, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 30.842, |
|
"eval_gen_len": 18.0892, |
|
"eval_loss": 1.1251049041748047, |
|
"eval_runtime": 1535.7063, |
|
"eval_samples_per_second": 14.786, |
|
"eval_steps_per_second": 0.925, |
|
"step": 22708 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.9871410956491105e-05, |
|
"loss": 0.8788, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.9651224238153955e-05, |
|
"loss": 0.8367, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.9431037519816805e-05, |
|
"loss": 0.8196, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.921085080147966e-05, |
|
"loss": 0.7967, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.8990664083142505e-05, |
|
"loss": 0.7956, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.8770477364805355e-05, |
|
"loss": 0.7843, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.8550290646468205e-05, |
|
"loss": 0.7767, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.8330103928131055e-05, |
|
"loss": 0.7668, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 3.8109917209793905e-05, |
|
"loss": 0.754, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.788973049145676e-05, |
|
"loss": 0.7671, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.7669543773119605e-05, |
|
"loss": 0.7569, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.7449357054782456e-05, |
|
"loss": 0.7553, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.7229170336445306e-05, |
|
"loss": 0.7526, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3.7008983618108156e-05, |
|
"loss": 0.7427, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.6788796899771006e-05, |
|
"loss": 0.7407, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.656861018143386e-05, |
|
"loss": 0.7418, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 3.6348423463096706e-05, |
|
"loss": 0.7301, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.6128236744759556e-05, |
|
"loss": 0.7218, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.5908050026422406e-05, |
|
"loss": 0.7232, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 3.5687863308085256e-05, |
|
"loss": 0.7375, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.5467676589748106e-05, |
|
"loss": 0.7261, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.524748987141096e-05, |
|
"loss": 0.7197, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.5027303153073806e-05, |
|
"loss": 0.7127, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 31.2642, |
|
"eval_gen_len": 18.1188, |
|
"eval_loss": 1.168749451637268, |
|
"eval_runtime": 1511.0096, |
|
"eval_samples_per_second": 15.028, |
|
"eval_steps_per_second": 0.94, |
|
"step": 34062 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.4807116434736656e-05, |
|
"loss": 0.6673, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.4586929716399506e-05, |
|
"loss": 0.6346, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 3.4366742998062356e-05, |
|
"loss": 0.6203, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 3.4146556279725206e-05, |
|
"loss": 0.605, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3.392636956138806e-05, |
|
"loss": 0.5996, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 3.3706182843050906e-05, |
|
"loss": 0.5942, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.3485996124713757e-05, |
|
"loss": 0.5911, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.3265809406376607e-05, |
|
"loss": 0.5741, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 3.304562268803946e-05, |
|
"loss": 0.5807, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 3.2825435969702313e-05, |
|
"loss": 0.5777, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 3.2605249251365164e-05, |
|
"loss": 0.5823, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 3.238506253302801e-05, |
|
"loss": 0.5698, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 3.216487581469086e-05, |
|
"loss": 0.5727, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 3.194468909635371e-05, |
|
"loss": 0.5648, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 3.172450237801656e-05, |
|
"loss": 0.5641, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 3.1504315659679414e-05, |
|
"loss": 0.559, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 3.1284128941342264e-05, |
|
"loss": 0.5487, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 3.106394222300511e-05, |
|
"loss": 0.5521, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 3.084375550466796e-05, |
|
"loss": 0.5509, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 3.062356878633081e-05, |
|
"loss": 0.5557, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 3.0403382067993657e-05, |
|
"loss": 0.551, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 3.018319534965651e-05, |
|
"loss": 0.5406, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 30.9531, |
|
"eval_gen_len": 17.9958, |
|
"eval_loss": 1.2618861198425293, |
|
"eval_runtime": 1469.856, |
|
"eval_samples_per_second": 15.448, |
|
"eval_steps_per_second": 0.966, |
|
"step": 45416 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 2.996300863131936e-05, |
|
"loss": 0.5371, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 2.974282191298221e-05, |
|
"loss": 0.4843, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 2.952263519464506e-05, |
|
"loss": 0.4779, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 2.9302448476307908e-05, |
|
"loss": 0.4551, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 2.9082261757970758e-05, |
|
"loss": 0.4512, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 2.886207503963361e-05, |
|
"loss": 0.4464, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.864188832129646e-05, |
|
"loss": 0.4317, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.842170160295931e-05, |
|
"loss": 0.4392, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.820151488462216e-05, |
|
"loss": 0.4232, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 2.7981328166285008e-05, |
|
"loss": 0.4341, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 2.7761141447947865e-05, |
|
"loss": 0.4245, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 2.754095472961071e-05, |
|
"loss": 0.4266, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 2.732076801127356e-05, |
|
"loss": 0.4199, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 2.710058129293641e-05, |
|
"loss": 0.4266, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 2.688039457459926e-05, |
|
"loss": 0.4191, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 2.6660207856262108e-05, |
|
"loss": 0.4155, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 2.6440021137924965e-05, |
|
"loss": 0.4058, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.6219834419587812e-05, |
|
"loss": 0.4079, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 2.5999647701250662e-05, |
|
"loss": 0.4041, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 2.5779460982913512e-05, |
|
"loss": 0.4099, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 2.5559274264576362e-05, |
|
"loss": 0.4107, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 2.533908754623921e-05, |
|
"loss": 0.4016, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 2.5118900827902065e-05, |
|
"loss": 0.4027, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 30.7923, |
|
"eval_gen_len": 18.0582, |
|
"eval_loss": 1.3788937330245972, |
|
"eval_runtime": 1470.5701, |
|
"eval_samples_per_second": 15.441, |
|
"eval_steps_per_second": 0.966, |
|
"step": 56770 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 2.4898714109564912e-05, |
|
"loss": 0.3814, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 2.4678527391227762e-05, |
|
"loss": 0.3509, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 2.4458340672890612e-05, |
|
"loss": 0.3365, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 2.4238153954553462e-05, |
|
"loss": 0.3269, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 2.4017967236216312e-05, |
|
"loss": 0.3289, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 2.3797780517879162e-05, |
|
"loss": 0.3147, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 2.3577593799542012e-05, |
|
"loss": 0.3133, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 2.3357407081204862e-05, |
|
"loss": 0.3149, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 2.3137220362867713e-05, |
|
"loss": 0.3028, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 2.2917033644530563e-05, |
|
"loss": 0.313, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 2.2696846926193413e-05, |
|
"loss": 0.3081, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 2.2476660207856263e-05, |
|
"loss": 0.3068, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 2.2256473489519113e-05, |
|
"loss": 0.3054, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 2.2036286771181963e-05, |
|
"loss": 0.3015, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 2.1816100052844813e-05, |
|
"loss": 0.3043, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 2.1595913334507663e-05, |
|
"loss": 0.3009, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 2.1375726616170513e-05, |
|
"loss": 0.2964, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 2.1155539897833363e-05, |
|
"loss": 0.2883, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 2.0935353179496213e-05, |
|
"loss": 0.2958, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 2.0715166461159063e-05, |
|
"loss": 0.2985, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 2.0494979742821913e-05, |
|
"loss": 0.2975, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 2.0274793024484763e-05, |
|
"loss": 0.288, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 2.0054606306147613e-05, |
|
"loss": 0.286, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 30.9393, |
|
"eval_gen_len": 18.1183, |
|
"eval_loss": 1.4784362316131592, |
|
"eval_runtime": 1467.697, |
|
"eval_samples_per_second": 15.471, |
|
"eval_steps_per_second": 0.968, |
|
"step": 68124 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 113540, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 2.3622407943975076e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|