|
{ |
|
"best_metric": 0.6102247834205627, |
|
"best_model_checkpoint": "./luke-marian-training-1/checkpoint-123440", |
|
"epoch": 10.0, |
|
"global_step": 123440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.052495139338951e-07, |
|
"loss": 0.7856, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.6153596889176928e-06, |
|
"loss": 0.748, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.4254698639014908e-06, |
|
"loss": 0.7809, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.2355800388852886e-06, |
|
"loss": 0.7923, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.044069993519119e-06, |
|
"loss": 0.7799, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.854180168502917e-06, |
|
"loss": 0.8085, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5.664290343486715e-06, |
|
"loss": 0.7759, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.474400518470513e-06, |
|
"loss": 0.7833, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.2828904731043424e-06, |
|
"loss": 0.7804, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.09300064808814e-06, |
|
"loss": 0.8169, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.903110823071939e-06, |
|
"loss": 0.8058, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.713220998055737e-06, |
|
"loss": 0.8265, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.972541528805813e-06, |
|
"loss": 0.7904, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.929904151175087e-06, |
|
"loss": 0.7829, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.88726677354436e-06, |
|
"loss": 0.8126, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.844629395913635e-06, |
|
"loss": 0.8133, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.802077293038171e-06, |
|
"loss": 0.7928, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.759525190162705e-06, |
|
"loss": 0.8564, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.716887812531979e-06, |
|
"loss": 0.7624, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.674250434901252e-06, |
|
"loss": 0.7662, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.631613057270527e-06, |
|
"loss": 0.8363, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.5889756796398e-06, |
|
"loss": 0.7793, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.546338302009073e-06, |
|
"loss": 0.7561, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.50378619913361e-06, |
|
"loss": 0.774, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_BLEU": 32.0811169920738, |
|
"eval_BLEU-Bigram-Precision": 37.1092045950269, |
|
"eval_BLEU-Trigram-Precision": 26.487573476332887, |
|
"eval_BLEU-Unigram-Precision": 56.60325589903054, |
|
"eval_ROUGE-2": 25.923225512351976, |
|
"eval_ROUGE-L": 40.443599801980355, |
|
"eval_Sacre-Bigram-Precision": 34.86367988711385, |
|
"eval_Sacre-Trigram-Precision": 26.20122541115769, |
|
"eval_Sacre-Unigram-Precision": 54.42125237191651, |
|
"eval_SacreBLEU": 30.767354925943692, |
|
"eval_loss": 0.9131098985671997, |
|
"eval_runtime": 174.9979, |
|
"eval_samples_per_second": 7.069, |
|
"eval_steps_per_second": 7.069, |
|
"step": 12344 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.461148821502884e-06, |
|
"loss": 0.7846, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.418511443872157e-06, |
|
"loss": 0.7368, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.37587406624143e-06, |
|
"loss": 0.7279, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.333236688610704e-06, |
|
"loss": 0.7253, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 9.290599310979979e-06, |
|
"loss": 0.7259, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 9.247961933349252e-06, |
|
"loss": 0.7432, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 9.205324555718525e-06, |
|
"loss": 0.7437, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 9.1626871780878e-06, |
|
"loss": 0.6641, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 9.120135075212335e-06, |
|
"loss": 0.7239, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 9.077497697581608e-06, |
|
"loss": 0.7007, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 9.034860319950883e-06, |
|
"loss": 0.7107, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.992222942320156e-06, |
|
"loss": 0.7033, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 8.949670839444692e-06, |
|
"loss": 0.6932, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 8.907118736569226e-06, |
|
"loss": 0.7351, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 8.8644813589385e-06, |
|
"loss": 0.7511, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 8.821843981307775e-06, |
|
"loss": 0.6994, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.779206603677048e-06, |
|
"loss": 0.6792, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.736569226046321e-06, |
|
"loss": 0.6701, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.693931848415596e-06, |
|
"loss": 0.6885, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.65129447078487e-06, |
|
"loss": 0.6796, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 8.608657093154144e-06, |
|
"loss": 0.6812, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 8.566104990278679e-06, |
|
"loss": 0.704, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 8.523467612647952e-06, |
|
"loss": 0.6443, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 8.480830235017227e-06, |
|
"loss": 0.6513, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 8.4381928573865e-06, |
|
"loss": 0.6532, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_BLEU": 34.14564345469897, |
|
"eval_BLEU-Bigram-Precision": 42.337954289996254, |
|
"eval_BLEU-Trigram-Precision": 32.169876203576344, |
|
"eval_BLEU-Unigram-Precision": 61.27510040160643, |
|
"eval_ROUGE-2": 28.971835230430752, |
|
"eval_ROUGE-L": 43.230417531342944, |
|
"eval_Sacre-Bigram-Precision": 40.19043401240035, |
|
"eval_Sacre-Trigram-Precision": 31.907054139181078, |
|
"eval_Sacre-Unigram-Precision": 59.04875395057251, |
|
"eval_SacreBLEU": 33.00279853845508, |
|
"eval_loss": 0.8338403701782227, |
|
"eval_runtime": 167.0515, |
|
"eval_samples_per_second": 7.405, |
|
"eval_steps_per_second": 7.405, |
|
"step": 24688 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 8.395555479755773e-06, |
|
"loss": 0.6209, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 8.35300337688031e-06, |
|
"loss": 0.6246, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.310365999249583e-06, |
|
"loss": 0.6061, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.267728621618858e-06, |
|
"loss": 0.6347, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.22509124398813e-06, |
|
"loss": 0.609, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.182539141112665e-06, |
|
"loss": 0.6059, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.13990176348194e-06, |
|
"loss": 0.6301, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 8.097264385851213e-06, |
|
"loss": 0.5859, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 8.054627008220487e-06, |
|
"loss": 0.6133, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 8.012074905345023e-06, |
|
"loss": 0.5813, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 7.969437527714296e-06, |
|
"loss": 0.6049, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 7.92680015008357e-06, |
|
"loss": 0.5982, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 7.884162772452844e-06, |
|
"loss": 0.5807, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.841525394822117e-06, |
|
"loss": 0.5809, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.798973291946653e-06, |
|
"loss": 0.5984, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.756335914315927e-06, |
|
"loss": 0.6308, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.7136985366852e-06, |
|
"loss": 0.5964, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 7.671061159054473e-06, |
|
"loss": 0.5783, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 7.62850905617901e-06, |
|
"loss": 0.5914, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.585871678548283e-06, |
|
"loss": 0.5716, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 7.543319575672818e-06, |
|
"loss": 0.613, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 7.500682198042092e-06, |
|
"loss": 0.561, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.458044820411367e-06, |
|
"loss": 0.5731, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.41540744278064e-06, |
|
"loss": 0.565, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.372855339905175e-06, |
|
"loss": 0.6118, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_BLEU": 39.5916929052258, |
|
"eval_BLEU-Bigram-Precision": 44.30874450415242, |
|
"eval_BLEU-Trigram-Precision": 34.66957832891385, |
|
"eval_BLEU-Unigram-Precision": 61.68977749113189, |
|
"eval_ROUGE-2": 33.461963127391705, |
|
"eval_ROUGE-L": 46.65653806735692, |
|
"eval_Sacre-Bigram-Precision": 42.369437106279214, |
|
"eval_Sacre-Trigram-Precision": 34.54220198406245, |
|
"eval_Sacre-Unigram-Precision": 59.68165957650208, |
|
"eval_SacreBLEU": 38.27100461755832, |
|
"eval_loss": 0.7759082317352295, |
|
"eval_runtime": 173.3948, |
|
"eval_samples_per_second": 7.134, |
|
"eval_steps_per_second": 7.134, |
|
"step": 37032 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 7.33030323702971e-06, |
|
"loss": 0.4871, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 7.287665859398984e-06, |
|
"loss": 0.4991, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 7.245028481768258e-06, |
|
"loss": 0.5436, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.202391104137532e-06, |
|
"loss": 0.5165, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 7.159753726506805e-06, |
|
"loss": 0.516, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 7.11711634887608e-06, |
|
"loss": 0.5173, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 7.074478971245353e-06, |
|
"loss": 0.5239, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 7.031841593614626e-06, |
|
"loss": 0.5112, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 6.9892042159839e-06, |
|
"loss": 0.516, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 6.9465668383531745e-06, |
|
"loss": 0.5384, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 6.903929460722449e-06, |
|
"loss": 0.5194, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 6.861292083091722e-06, |
|
"loss": 0.5006, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 6.818739980216257e-06, |
|
"loss": 0.5362, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 6.776102602585531e-06, |
|
"loss": 0.5505, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 6.733465224954805e-06, |
|
"loss": 0.5247, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 6.6908278473240785e-06, |
|
"loss": 0.5104, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 6.648275744448614e-06, |
|
"loss": 0.516, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 6.605638366817888e-06, |
|
"loss": 0.5097, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 6.563000989187162e-06, |
|
"loss": 0.5147, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 6.520363611556435e-06, |
|
"loss": 0.4988, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 6.47781150868097e-06, |
|
"loss": 0.5143, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 6.435174131050244e-06, |
|
"loss": 0.5146, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 6.3925367534195185e-06, |
|
"loss": 0.4949, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 6.349899375788792e-06, |
|
"loss": 0.5327, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_BLEU": 41.583977153720554, |
|
"eval_BLEU-Bigram-Precision": 47.79210232463709, |
|
"eval_BLEU-Trigram-Precision": 38.44570809639859, |
|
"eval_BLEU-Unigram-Precision": 64.70223028797936, |
|
"eval_ROUGE-2": 35.92233849645701, |
|
"eval_ROUGE-L": 48.89087193511837, |
|
"eval_Sacre-Bigram-Precision": 46.017043349388665, |
|
"eval_Sacre-Trigram-Precision": 38.29859537834164, |
|
"eval_Sacre-Unigram-Precision": 62.97069051167412, |
|
"eval_SacreBLEU": 40.202754965999326, |
|
"eval_loss": 0.734529972076416, |
|
"eval_runtime": 168.3041, |
|
"eval_samples_per_second": 7.35, |
|
"eval_steps_per_second": 7.35, |
|
"step": 49376 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.307347272913327e-06, |
|
"loss": 0.515, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 6.264709895282601e-06, |
|
"loss": 0.4603, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 6.222072517651875e-06, |
|
"loss": 0.4565, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 6.179435140021148e-06, |
|
"loss": 0.4557, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 6.136797762390423e-06, |
|
"loss": 0.4586, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 6.094245659514958e-06, |
|
"loss": 0.4295, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 6.051608281884232e-06, |
|
"loss": 0.4774, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 6.009056179008766e-06, |
|
"loss": 0.4555, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 5.966418801378041e-06, |
|
"loss": 0.4448, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 5.923781423747314e-06, |
|
"loss": 0.4481, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 5.881144046116588e-06, |
|
"loss": 0.4288, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 5.838506668485862e-06, |
|
"loss": 0.4459, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 5.7958692908551366e-06, |
|
"loss": 0.4616, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 5.75323191322441e-06, |
|
"loss": 0.4775, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 5.710594535593683e-06, |
|
"loss": 0.4414, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 5.668042432718218e-06, |
|
"loss": 0.483, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 5.625405055087493e-06, |
|
"loss": 0.4671, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 5.5827676774567664e-06, |
|
"loss": 0.4696, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 5.54013029982604e-06, |
|
"loss": 0.4628, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 5.497578196950575e-06, |
|
"loss": 0.4591, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 5.45494081931985e-06, |
|
"loss": 0.4362, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 5.412303441689123e-06, |
|
"loss": 0.4767, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 5.369666064058396e-06, |
|
"loss": 0.4642, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 5.3271139611829315e-06, |
|
"loss": 0.4348, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 5.2844765835522064e-06, |
|
"loss": 0.4365, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_BLEU": 43.53907049710156, |
|
"eval_BLEU-Bigram-Precision": 49.90573248407643, |
|
"eval_BLEU-Trigram-Precision": 40.825538394605175, |
|
"eval_BLEU-Unigram-Precision": 66.37906241012367, |
|
"eval_ROUGE-2": 38.22782051808487, |
|
"eval_ROUGE-L": 51.36307879612767, |
|
"eval_Sacre-Bigram-Precision": 48.25423818887177, |
|
"eval_Sacre-Trigram-Precision": 40.784982935153586, |
|
"eval_Sacre-Unigram-Precision": 64.79006681958711, |
|
"eval_SacreBLEU": 42.18614400441328, |
|
"eval_loss": 0.7037733197212219, |
|
"eval_runtime": 168.6409, |
|
"eval_samples_per_second": 7.335, |
|
"eval_steps_per_second": 7.335, |
|
"step": 61720 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 5.241924480676741e-06, |
|
"loss": 0.4345, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 5.199372377801276e-06, |
|
"loss": 0.4124, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 5.156735000170549e-06, |
|
"loss": 0.3992, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 5.114097622539824e-06, |
|
"loss": 0.3928, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 5.0714602449090974e-06, |
|
"loss": 0.4428, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 5.0288228672783715e-06, |
|
"loss": 0.4003, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 4.986185489647646e-06, |
|
"loss": 0.4077, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 4.943548112016919e-06, |
|
"loss": 0.4076, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 4.900910734386193e-06, |
|
"loss": 0.4138, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 4.858273356755466e-06, |
|
"loss": 0.3901, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 4.81563597912474e-06, |
|
"loss": 0.4412, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 4.7729986014940135e-06, |
|
"loss": 0.4128, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 4.730361223863288e-06, |
|
"loss": 0.4382, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 4.687809120987823e-06, |
|
"loss": 0.3855, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 4.645171743357097e-06, |
|
"loss": 0.4116, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 4.60253436572637e-06, |
|
"loss": 0.3977, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 4.559896988095644e-06, |
|
"loss": 0.4232, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 4.517259610464918e-06, |
|
"loss": 0.4194, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 4.4746222328341924e-06, |
|
"loss": 0.389, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 4.4319848552034665e-06, |
|
"loss": 0.4051, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 4.38934747757274e-06, |
|
"loss": 0.4337, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 4.346795374697275e-06, |
|
"loss": 0.4143, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 4.304157997066549e-06, |
|
"loss": 0.4146, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 4.261520619435823e-06, |
|
"loss": 0.3878, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 4.218883241805096e-06, |
|
"loss": 0.3955, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_BLEU": 46.18598569567297, |
|
"eval_BLEU-Bigram-Precision": 51.835734293717486, |
|
"eval_BLEU-Trigram-Precision": 42.84724073580379, |
|
"eval_BLEU-Unigram-Precision": 67.89297658862876, |
|
"eval_ROUGE-2": 40.77288879803008, |
|
"eval_ROUGE-L": 53.47443573476642, |
|
"eval_Sacre-Bigram-Precision": 50.16359387172163, |
|
"eval_Sacre-Trigram-Precision": 42.76279276279276, |
|
"eval_Sacre-Unigram-Precision": 66.28928362287722, |
|
"eval_SacreBLEU": 44.9656942267261, |
|
"eval_loss": 0.6645232439041138, |
|
"eval_runtime": 169.8377, |
|
"eval_samples_per_second": 7.283, |
|
"eval_steps_per_second": 7.283, |
|
"step": 74064 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 4.176331138929632e-06, |
|
"loss": 0.3766, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 4.133693761298906e-06, |
|
"loss": 0.3556, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 4.09105638366818e-06, |
|
"loss": 0.3511, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 4.048504280792714e-06, |
|
"loss": 0.4021, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 4.005952177917249e-06, |
|
"loss": 0.3931, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 3.9633148002865234e-06, |
|
"loss": 0.3704, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 3.9206774226557975e-06, |
|
"loss": 0.3515, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 3.878040045025071e-06, |
|
"loss": 0.3907, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 3.835402667394345e-06, |
|
"loss": 0.3643, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 3.7927652897636185e-06, |
|
"loss": 0.3708, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 3.7501279121328926e-06, |
|
"loss": 0.3697, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 3.7074905345021663e-06, |
|
"loss": 0.3486, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 3.6648531568714404e-06, |
|
"loss": 0.3552, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 3.6222157792407136e-06, |
|
"loss": 0.3812, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3.5795784016099877e-06, |
|
"loss": 0.3622, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 3.5369410239792614e-06, |
|
"loss": 0.391, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 3.494388921103797e-06, |
|
"loss": 0.4055, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 3.4517515434730702e-06, |
|
"loss": 0.3967, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 3.4091141658423443e-06, |
|
"loss": 0.3949, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 3.3664767882116184e-06, |
|
"loss": 0.3898, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 3.3239246853361536e-06, |
|
"loss": 0.3997, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 3.281287307705427e-06, |
|
"loss": 0.4075, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 3.238649930074701e-06, |
|
"loss": 0.3704, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 3.196012552443975e-06, |
|
"loss": 0.3809, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_BLEU": 48.53041153339425, |
|
"eval_BLEU-Bigram-Precision": 52.751822317890515, |
|
"eval_BLEU-Trigram-Precision": 44.54280358258696, |
|
"eval_BLEU-Unigram-Precision": 67.82913552910786, |
|
"eval_ROUGE-2": 42.93136082972705, |
|
"eval_ROUGE-L": 54.935588774300534, |
|
"eval_Sacre-Bigram-Precision": 51.321117635148816, |
|
"eval_Sacre-Trigram-Precision": 44.494843134078515, |
|
"eval_Sacre-Unigram-Precision": 66.2363645024532, |
|
"eval_SacreBLEU": 47.488510994186036, |
|
"eval_loss": 0.6420064568519592, |
|
"eval_runtime": 175.4955, |
|
"eval_samples_per_second": 7.049, |
|
"eval_steps_per_second": 7.049, |
|
"step": 86408 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3.1534604495685103e-06, |
|
"loss": 0.3682, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 3.110908346693045e-06, |
|
"loss": 0.3759, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 3.068270969062319e-06, |
|
"loss": 0.3607, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 3.025633591431593e-06, |
|
"loss": 0.3478, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 2.982996213800867e-06, |
|
"loss": 0.3474, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 2.94035883617014e-06, |
|
"loss": 0.3824, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 2.8977214585394142e-06, |
|
"loss": 0.3469, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 2.8550840809086883e-06, |
|
"loss": 0.3599, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 2.8124467032779616e-06, |
|
"loss": 0.3332, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 2.7698093256472356e-06, |
|
"loss": 0.3508, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 2.727257222771771e-06, |
|
"loss": 0.3382, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 2.684619845141045e-06, |
|
"loss": 0.3583, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 2.641982467510318e-06, |
|
"loss": 0.3392, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 2.5993450898795923e-06, |
|
"loss": 0.3548, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 2.556707712248866e-06, |
|
"loss": 0.3372, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 2.5141556093734016e-06, |
|
"loss": 0.3386, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 2.471518231742675e-06, |
|
"loss": 0.362, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 2.428880854111949e-06, |
|
"loss": 0.3454, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 2.3862434764812226e-06, |
|
"loss": 0.3757, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 2.3436913736057578e-06, |
|
"loss": 0.3785, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 2.3010539959750314e-06, |
|
"loss": 0.3656, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 2.2584166183443055e-06, |
|
"loss": 0.36, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 2.2157792407135796e-06, |
|
"loss": 0.3522, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 2.1732271378381144e-06, |
|
"loss": 0.3511, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 2.130589760207388e-06, |
|
"loss": 0.3349, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_BLEU": 49.57199736076417, |
|
"eval_BLEU-Bigram-Precision": 55.58697298661954, |
|
"eval_BLEU-Trigram-Precision": 47.143087942269375, |
|
"eval_BLEU-Unigram-Precision": 70.68244463454045, |
|
"eval_ROUGE-2": 44.4865922111988, |
|
"eval_ROUGE-L": 56.71133192128113, |
|
"eval_Sacre-Bigram-Precision": 53.98119122257053, |
|
"eval_Sacre-Trigram-Precision": 47.064737753449144, |
|
"eval_Sacre-Unigram-Precision": 69.11714187564411, |
|
"eval_SacreBLEU": 48.47735653922784, |
|
"eval_loss": 0.6274014711380005, |
|
"eval_runtime": 169.1165, |
|
"eval_samples_per_second": 7.314, |
|
"eval_steps_per_second": 7.314, |
|
"step": 98752 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 2.087952382576662e-06, |
|
"loss": 0.3635, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 2.0453150049459363e-06, |
|
"loss": 0.3149, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 2.00267762731521e-06, |
|
"loss": 0.3348, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 1.9600402496844836e-06, |
|
"loss": 0.3358, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 1.9174028720537573e-06, |
|
"loss": 0.3405, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 1.8747654944230311e-06, |
|
"loss": 0.328, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 1.8322133915475666e-06, |
|
"loss": 0.3487, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 1.7895760139168402e-06, |
|
"loss": 0.3012, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 1.746938636286114e-06, |
|
"loss": 0.3456, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 1.7043012586553878e-06, |
|
"loss": 0.3472, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 1.6617491557799232e-06, |
|
"loss": 0.3476, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 1.6191117781491968e-06, |
|
"loss": 0.3262, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 1.5765596752737323e-06, |
|
"loss": 0.3607, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 1.533922297643006e-06, |
|
"loss": 0.341, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 1.4912849200122798e-06, |
|
"loss": 0.3164, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 1.4486475423815535e-06, |
|
"loss": 0.354, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 1.4060101647508274e-06, |
|
"loss": 0.3282, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 1.363372787120101e-06, |
|
"loss": 0.3419, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 1.320735409489375e-06, |
|
"loss": 0.3524, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 1.2780980318586486e-06, |
|
"loss": 0.3372, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 1.2354606542279224e-06, |
|
"loss": 0.358, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 1.1929085513524577e-06, |
|
"loss": 0.3426, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 1.150356448476993e-06, |
|
"loss": 0.3449, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 1.1077190708462667e-06, |
|
"loss": 0.3547, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 1.0650816932155406e-06, |
|
"loss": 0.347, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_BLEU": 50.42765483523289, |
|
"eval_BLEU-Bigram-Precision": 55.31231468669697, |
|
"eval_BLEU-Trigram-Precision": 47.11826938259908, |
|
"eval_BLEU-Unigram-Precision": 69.82722488706749, |
|
"eval_ROUGE-2": 44.880113409125535, |
|
"eval_ROUGE-L": 56.82064108206701, |
|
"eval_Sacre-Bigram-Precision": 53.78361795684631, |
|
"eval_Sacre-Trigram-Precision": 47.04437530702472, |
|
"eval_Sacre-Unigram-Precision": 68.36258716037509, |
|
"eval_SacreBLEU": 49.36901330124516, |
|
"eval_loss": 0.6147586703300476, |
|
"eval_runtime": 169.9129, |
|
"eval_samples_per_second": 7.28, |
|
"eval_steps_per_second": 7.28, |
|
"step": 111096 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 1.0224443155848143e-06, |
|
"loss": 0.3398, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 9.798069379540882e-07, |
|
"loss": 0.3255, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 9.37169560323362e-07, |
|
"loss": 0.3178, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 8.946174574478971e-07, |
|
"loss": 0.3281, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 8.520653545724324e-07, |
|
"loss": 0.3261, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 8.094279769417062e-07, |
|
"loss": 0.3275, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 7.6679059931098e-07, |
|
"loss": 0.3359, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 7.241532216802538e-07, |
|
"loss": 0.3165, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 6.815158440495275e-07, |
|
"loss": 0.3199, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 6.388784664188015e-07, |
|
"loss": 0.332, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 5.962410887880752e-07, |
|
"loss": 0.3227, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 5.536037111573491e-07, |
|
"loss": 0.3203, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 5.109663335266228e-07, |
|
"loss": 0.347, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 4.6832895589589667e-07, |
|
"loss": 0.3485, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 4.2569157826517044e-07, |
|
"loss": 0.3244, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 3.830542006344442e-07, |
|
"loss": 0.3351, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 3.4041682300371804e-07, |
|
"loss": 0.3341, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 2.9786472012825325e-07, |
|
"loss": 0.328, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 2.5522734249752707e-07, |
|
"loss": 0.3447, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 2.1258996486680084e-07, |
|
"loss": 0.3326, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 1.6995258723607467e-07, |
|
"loss": 0.3259, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 1.274004843606099e-07, |
|
"loss": 0.3355, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 8.47631067298837e-08, |
|
"loss": 0.3653, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 4.212572909915749e-08, |
|
"loss": 0.3359, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_BLEU": 50.8713809636162, |
|
"eval_BLEU-Bigram-Precision": 55.25529935869192, |
|
"eval_BLEU-Trigram-Precision": 47.15997915581032, |
|
"eval_BLEU-Unigram-Precision": 69.56702363367799, |
|
"eval_ROUGE-2": 45.59251780348448, |
|
"eval_ROUGE-L": 57.14390377702154, |
|
"eval_Sacre-Bigram-Precision": 53.736780853109344, |
|
"eval_Sacre-Trigram-Precision": 47.052790672568285, |
|
"eval_Sacre-Unigram-Precision": 68.06666666666666, |
|
"eval_SacreBLEU": 49.82287256259556, |
|
"eval_loss": 0.6102247834205627, |
|
"eval_runtime": 172.6687, |
|
"eval_samples_per_second": 7.164, |
|
"eval_steps_per_second": 7.164, |
|
"step": 123440 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 123440, |
|
"total_flos": 3771355808563200.0, |
|
"train_loss": 0.4878676912032082, |
|
"train_runtime": 11854.8374, |
|
"train_samples_per_second": 20.824, |
|
"train_steps_per_second": 10.413 |
|
} |
|
], |
|
"max_steps": 123440, |
|
"num_train_epochs": 10, |
|
"total_flos": 3771355808563200.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|