|
{ |
|
"best_metric": 0.4481608271598816, |
|
"best_model_checkpoint": "./distilbert-marian-training-1/checkpoint-123440", |
|
"epoch": 10.0, |
|
"global_step": 123440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.068697342838627e-07, |
|
"loss": 0.5266, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.6169799092676605e-06, |
|
"loss": 0.497, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.4270900842514583e-06, |
|
"loss": 0.5398, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.2372002592352565e-06, |
|
"loss": 0.5439, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.047310434219054e-06, |
|
"loss": 0.5357, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.857420609202852e-06, |
|
"loss": 0.5693, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5.66753078418665e-06, |
|
"loss": 0.5311, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.477640959170448e-06, |
|
"loss": 0.5433, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.286130913804278e-06, |
|
"loss": 0.5403, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.094620868438109e-06, |
|
"loss": 0.5478, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.904731043421905e-06, |
|
"loss": 0.5556, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.714841218405703e-06, |
|
"loss": 0.5835, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.972370979295291e-06, |
|
"loss": 0.5578, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.929733601664564e-06, |
|
"loss": 0.5389, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.887181498789099e-06, |
|
"loss": 0.5645, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.844544121158372e-06, |
|
"loss": 0.5672, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.801906743527647e-06, |
|
"loss": 0.559, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.759354640652181e-06, |
|
"loss": 0.5927, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.716717263021456e-06, |
|
"loss": 0.5241, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.67407988539073e-06, |
|
"loss": 0.5458, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.631442507760004e-06, |
|
"loss": 0.5902, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.588805130129278e-06, |
|
"loss": 0.5425, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.546167752498551e-06, |
|
"loss": 0.5225, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.503615649623087e-06, |
|
"loss": 0.5301, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_BLEU": 39.52568484782476, |
|
"eval_BLEU-Bigram-Precision": 44.566237100676446, |
|
"eval_BLEU-Trigram-Precision": 34.507782919646615, |
|
"eval_BLEU-Unigram-Precision": 63.26198231735691, |
|
"eval_ROUGE-2": 33.35721035298087, |
|
"eval_ROUGE-L": 47.29001112126434, |
|
"eval_Sacre-Bigram-Precision": 42.86996462699963, |
|
"eval_Sacre-Trigram-Precision": 34.29733393583371, |
|
"eval_Sacre-Unigram-Precision": 61.48775894538606, |
|
"eval_SacreBLEU": 38.73636411085535, |
|
"eval_loss": 0.7005472183227539, |
|
"eval_runtime": 848.4996, |
|
"eval_samples_per_second": 1.458, |
|
"eval_steps_per_second": 1.458, |
|
"step": 12344 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.46097827199236e-06, |
|
"loss": 0.5475, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.418340894361634e-06, |
|
"loss": 0.5107, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.375703516730907e-06, |
|
"loss": 0.5036, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.333066139100182e-06, |
|
"loss": 0.4865, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 9.290428761469455e-06, |
|
"loss": 0.5065, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 9.247791383838728e-06, |
|
"loss": 0.5159, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 9.205154006208003e-06, |
|
"loss": 0.5221, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 9.16260190333254e-06, |
|
"loss": 0.4597, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 9.119964525701812e-06, |
|
"loss": 0.5024, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 9.077327148071086e-06, |
|
"loss": 0.4852, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 9.034689770440359e-06, |
|
"loss": 0.4887, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.992052392809634e-06, |
|
"loss": 0.4796, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 8.949415015178907e-06, |
|
"loss": 0.4754, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 8.90677763754818e-06, |
|
"loss": 0.5062, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 8.864140259917455e-06, |
|
"loss": 0.5204, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 8.821502882286729e-06, |
|
"loss": 0.4799, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.778950779411265e-06, |
|
"loss": 0.4666, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.736313401780538e-06, |
|
"loss": 0.4484, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.693676024149813e-06, |
|
"loss": 0.4721, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.651038646519086e-06, |
|
"loss": 0.4867, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 8.60840126888836e-06, |
|
"loss": 0.47, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 8.565763891257632e-06, |
|
"loss": 0.4848, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 8.523126513626907e-06, |
|
"loss": 0.4429, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 8.480574410751442e-06, |
|
"loss": 0.4501, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 8.437937033120715e-06, |
|
"loss": 0.4363, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_BLEU": 44.07579241900836, |
|
"eval_BLEU-Bigram-Precision": 48.638266436838215, |
|
"eval_BLEU-Trigram-Precision": 39.453534717851895, |
|
"eval_BLEU-Unigram-Precision": 65.81561600594188, |
|
"eval_ROUGE-2": 37.93490065934162, |
|
"eval_ROUGE-L": 51.37484166047172, |
|
"eval_Sacre-Bigram-Precision": 47.097215642928575, |
|
"eval_Sacre-Trigram-Precision": 39.10595653642608, |
|
"eval_Sacre-Unigram-Precision": 64.20241154378336, |
|
"eval_SacreBLEU": 43.28181370216754, |
|
"eval_loss": 0.6306515336036682, |
|
"eval_runtime": 969.349, |
|
"eval_samples_per_second": 1.276, |
|
"eval_steps_per_second": 1.276, |
|
"step": 24688 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 8.395299655489988e-06, |
|
"loss": 0.4153, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 8.352662277859263e-06, |
|
"loss": 0.4241, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.310024900228536e-06, |
|
"loss": 0.401, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 8.267387522597811e-06, |
|
"loss": 0.4351, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.224750144967085e-06, |
|
"loss": 0.4075, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.18211276733636e-06, |
|
"loss": 0.4106, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.139560664460894e-06, |
|
"loss": 0.4175, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 8.096923286830167e-06, |
|
"loss": 0.3897, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 8.05428590919944e-06, |
|
"loss": 0.4152, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 8.011648531568715e-06, |
|
"loss": 0.3917, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 7.96909642869325e-06, |
|
"loss": 0.4081, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 7.926459051062525e-06, |
|
"loss": 0.3964, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 7.883821673431798e-06, |
|
"loss": 0.3833, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.841184295801073e-06, |
|
"loss": 0.3884, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.798546918170346e-06, |
|
"loss": 0.4027, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.75590954053962e-06, |
|
"loss": 0.4272, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.713272162908894e-06, |
|
"loss": 0.3868, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 7.670634785278167e-06, |
|
"loss": 0.3848, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 7.628082682402702e-06, |
|
"loss": 0.3919, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.585445304771976e-06, |
|
"loss": 0.3803, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 7.54280792714125e-06, |
|
"loss": 0.3986, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 7.500170549510524e-06, |
|
"loss": 0.3722, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.4576184466350585e-06, |
|
"loss": 0.3795, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.4149810690043326e-06, |
|
"loss": 0.3768, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.372343691373607e-06, |
|
"loss": 0.3983, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_BLEU": 47.64065251911936, |
|
"eval_BLEU-Bigram-Precision": 52.275310044962694, |
|
"eval_BLEU-Trigram-Precision": 43.41648247552889, |
|
"eval_BLEU-Unigram-Precision": 68.46246973365618, |
|
"eval_ROUGE-2": 41.91844760109249, |
|
"eval_ROUGE-L": 54.326234865452385, |
|
"eval_Sacre-Bigram-Precision": 50.88183888565224, |
|
"eval_Sacre-Trigram-Precision": 43.238494672409885, |
|
"eval_Sacre-Unigram-Precision": 67.02455512476389, |
|
"eval_SacreBLEU": 46.828668155215276, |
|
"eval_loss": 0.5844058990478516, |
|
"eval_runtime": 950.5601, |
|
"eval_samples_per_second": 1.301, |
|
"eval_steps_per_second": 1.301, |
|
"step": 37032 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 7.329706313742881e-06, |
|
"loss": 0.3116, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 7.287154210867415e-06, |
|
"loss": 0.3262, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 7.244516833236689e-06, |
|
"loss": 0.3429, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.201879455605963e-06, |
|
"loss": 0.3415, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 7.159242077975237e-06, |
|
"loss": 0.3407, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 7.116689975099772e-06, |
|
"loss": 0.3288, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 7.074052597469046e-06, |
|
"loss": 0.3499, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 7.03141521983832e-06, |
|
"loss": 0.3396, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 6.988863116962855e-06, |
|
"loss": 0.3395, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 6.946225739332128e-06, |
|
"loss": 0.3462, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 6.9035883617014025e-06, |
|
"loss": 0.3362, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 6.8609509840706765e-06, |
|
"loss": 0.3284, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 6.818313606439951e-06, |
|
"loss": 0.3446, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 6.775676228809224e-06, |
|
"loss": 0.36, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 6.733038851178497e-06, |
|
"loss": 0.3368, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 6.690401473547772e-06, |
|
"loss": 0.3362, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 6.647764095917045e-06, |
|
"loss": 0.338, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 6.6052119930415805e-06, |
|
"loss": 0.3206, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 6.562659890166116e-06, |
|
"loss": 0.339, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 6.52002251253539e-06, |
|
"loss": 0.3226, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 6.477385134904664e-06, |
|
"loss": 0.3386, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 6.434747757273937e-06, |
|
"loss": 0.3406, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 6.39211037964321e-06, |
|
"loss": 0.3231, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 6.349473002012485e-06, |
|
"loss": 0.3418, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_BLEU": 51.32592897573737, |
|
"eval_BLEU-Bigram-Precision": 55.485615933120236, |
|
"eval_BLEU-Trigram-Precision": 47.36897219749725, |
|
"eval_BLEU-Unigram-Precision": 70.36899684776562, |
|
"eval_ROUGE-2": 45.802241938436794, |
|
"eval_ROUGE-L": 57.842922925143036, |
|
"eval_Sacre-Bigram-Precision": 54.30648474951272, |
|
"eval_Sacre-Trigram-Precision": 47.28686538569899, |
|
"eval_Sacre-Unigram-Precision": 69.06033630069238, |
|
"eval_SacreBLEU": 50.59726409455209, |
|
"eval_loss": 0.5526171326637268, |
|
"eval_runtime": 916.9598, |
|
"eval_samples_per_second": 1.349, |
|
"eval_steps_per_second": 1.349, |
|
"step": 49376 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.3069208991370205e-06, |
|
"loss": 0.326, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 6.264283521506294e-06, |
|
"loss": 0.2931, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 6.221646143875567e-06, |
|
"loss": 0.2846, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 6.179008766244842e-06, |
|
"loss": 0.2896, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 6.136371388614115e-06, |
|
"loss": 0.2961, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 6.0937340109833884e-06, |
|
"loss": 0.2679, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 6.051096633352663e-06, |
|
"loss": 0.3005, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 6.0085445304771986e-06, |
|
"loss": 0.2921, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 5.965907152846472e-06, |
|
"loss": 0.2871, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 5.923269775215745e-06, |
|
"loss": 0.2853, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 5.88063239758502e-06, |
|
"loss": 0.2759, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 5.837995019954293e-06, |
|
"loss": 0.2793, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 5.7953576423235665e-06, |
|
"loss": 0.2924, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 5.7527202646928406e-06, |
|
"loss": 0.3162, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 5.710168161817377e-06, |
|
"loss": 0.2815, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 5.66753078418665e-06, |
|
"loss": 0.308, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 5.624893406555923e-06, |
|
"loss": 0.2984, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 5.582256028925197e-06, |
|
"loss": 0.2928, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 5.539618651294471e-06, |
|
"loss": 0.2903, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 5.496981273663745e-06, |
|
"loss": 0.2971, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 5.454343896033019e-06, |
|
"loss": 0.2815, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 5.411706518402292e-06, |
|
"loss": 0.3028, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 5.369154415526828e-06, |
|
"loss": 0.2931, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 5.326517037896102e-06, |
|
"loss": 0.2859, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 5.283879660265375e-06, |
|
"loss": 0.2838, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_BLEU": 53.29609495182536, |
|
"eval_BLEU-Bigram-Precision": 57.167285518858705, |
|
"eval_BLEU-Trigram-Precision": 49.0354115750611, |
|
"eval_BLEU-Unigram-Precision": 71.76226675881134, |
|
"eval_ROUGE-2": 48.21696209970032, |
|
"eval_ROUGE-L": 59.61148640581645, |
|
"eval_Sacre-Bigram-Precision": 56.00606885005755, |
|
"eval_Sacre-Trigram-Precision": 48.951166303070984, |
|
"eval_Sacre-Unigram-Precision": 70.51250552798388, |
|
"eval_SacreBLEU": 52.60559213285124, |
|
"eval_loss": 0.5165597796440125, |
|
"eval_runtime": 915.3947, |
|
"eval_samples_per_second": 1.351, |
|
"eval_steps_per_second": 1.351, |
|
"step": 61720 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 5.2412422826346485e-06, |
|
"loss": 0.2663, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 5.1986901797591846e-06, |
|
"loss": 0.25, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 5.156052802128459e-06, |
|
"loss": 0.2474, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 5.113415424497732e-06, |
|
"loss": 0.2469, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 5.070778046867005e-06, |
|
"loss": 0.277, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 5.02814066923628e-06, |
|
"loss": 0.2478, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 4.985588566360815e-06, |
|
"loss": 0.2593, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 4.9429511887300885e-06, |
|
"loss": 0.2565, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 4.900313811099363e-06, |
|
"loss": 0.2678, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 4.857676433468637e-06, |
|
"loss": 0.2434, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 4.815124330593172e-06, |
|
"loss": 0.2776, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 4.772486952962445e-06, |
|
"loss": 0.2678, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 4.729849575331719e-06, |
|
"loss": 0.2784, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 4.687212197700993e-06, |
|
"loss": 0.2404, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 4.6445748200702666e-06, |
|
"loss": 0.26, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 4.601937442439541e-06, |
|
"loss": 0.242, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 4.559300064808814e-06, |
|
"loss": 0.2636, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 4.516662687178088e-06, |
|
"loss": 0.2703, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 4.474110584302623e-06, |
|
"loss": 0.2457, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 4.431473206671897e-06, |
|
"loss": 0.2492, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 4.3888358290411705e-06, |
|
"loss": 0.2677, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 4.346198451410445e-06, |
|
"loss": 0.2557, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 4.303561073779719e-06, |
|
"loss": 0.262, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 4.261008970904254e-06, |
|
"loss": 0.247, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 4.218371593273527e-06, |
|
"loss": 0.2461, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_BLEU": 56.19871507978884, |
|
"eval_BLEU-Bigram-Precision": 58.651824957003626, |
|
"eval_BLEU-Trigram-Precision": 51.109418634171114, |
|
"eval_BLEU-Unigram-Precision": 72.28111326627273, |
|
"eval_ROUGE-2": 50.26575052573452, |
|
"eval_ROUGE-L": 61.216031502511356, |
|
"eval_Sacre-Bigram-Precision": 57.60808534531162, |
|
"eval_Sacre-Trigram-Precision": 51.07878391631252, |
|
"eval_Sacre-Unigram-Precision": 71.06299212598425, |
|
"eval_SacreBLEU": 55.641908183294326, |
|
"eval_loss": 0.49508827924728394, |
|
"eval_runtime": 1114.4052, |
|
"eval_samples_per_second": 1.11, |
|
"eval_steps_per_second": 1.11, |
|
"step": 74064 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 4.175734215642801e-06, |
|
"loss": 0.2288, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 4.133096838012075e-06, |
|
"loss": 0.2143, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 4.0904594603813494e-06, |
|
"loss": 0.2089, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 4.047907357505884e-06, |
|
"loss": 0.2401, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 4.00535525463042e-06, |
|
"loss": 0.2384, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 3.962717876999693e-06, |
|
"loss": 0.2268, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 3.920080499368967e-06, |
|
"loss": 0.2197, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 3.87744312173824e-06, |
|
"loss": 0.2373, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 3.8348910188627765e-06, |
|
"loss": 0.2261, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 3.7922536412320497e-06, |
|
"loss": 0.2299, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 3.749616263601324e-06, |
|
"loss": 0.2267, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 3.7069788859705975e-06, |
|
"loss": 0.2196, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 3.6643415083398716e-06, |
|
"loss": 0.2192, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 3.621704130709145e-06, |
|
"loss": 0.2365, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3.579066753078419e-06, |
|
"loss": 0.2187, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 3.5364293754476926e-06, |
|
"loss": 0.242, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 3.493877272572228e-06, |
|
"loss": 0.2483, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 3.4512398949415014e-06, |
|
"loss": 0.2399, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 3.4086025173107755e-06, |
|
"loss": 0.2409, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 3.365965139680049e-06, |
|
"loss": 0.242, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 3.323413036804585e-06, |
|
"loss": 0.2484, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 3.280775659173858e-06, |
|
"loss": 0.2562, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 3.238138281543132e-06, |
|
"loss": 0.2283, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 3.1955009039124062e-06, |
|
"loss": 0.236, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_BLEU": 57.78119869959703, |
|
"eval_BLEU-Bigram-Precision": 61.05482573076737, |
|
"eval_BLEU-Trigram-Precision": 53.63036303630363, |
|
"eval_BLEU-Unigram-Precision": 74.23397054788255, |
|
"eval_ROUGE-2": 52.1883256479329, |
|
"eval_ROUGE-L": 63.12941898321425, |
|
"eval_Sacre-Bigram-Precision": 60.065305276251685, |
|
"eval_Sacre-Trigram-Precision": 53.55263886581381, |
|
"eval_Sacre-Unigram-Precision": 73.0894744532658, |
|
"eval_SacreBLEU": 57.19500795208099, |
|
"eval_loss": 0.4768661558628082, |
|
"eval_runtime": 1205.3601, |
|
"eval_samples_per_second": 1.026, |
|
"eval_steps_per_second": 1.026, |
|
"step": 86408 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3.1529488010369415e-06, |
|
"loss": 0.2257, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 3.1103114234062147e-06, |
|
"loss": 0.2245, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 3.0676740457754888e-06, |
|
"loss": 0.215, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 3.025036668144763e-06, |
|
"loss": 0.2102, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 2.982484565269298e-06, |
|
"loss": 0.211, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 2.9398471876385713e-06, |
|
"loss": 0.236, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 2.8972098100078454e-06, |
|
"loss": 0.2069, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 2.8545724323771195e-06, |
|
"loss": 0.2169, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 2.8120203295016547e-06, |
|
"loss": 0.209, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 2.769382951870928e-06, |
|
"loss": 0.2195, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 2.726745574240202e-06, |
|
"loss": 0.2074, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 2.6841934713647373e-06, |
|
"loss": 0.2112, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 2.6415560937340113e-06, |
|
"loss": 0.2145, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 2.5989187161032846e-06, |
|
"loss": 0.213, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 2.5562813384725587e-06, |
|
"loss": 0.1966, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 2.5136439608418328e-06, |
|
"loss": 0.2097, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 2.4710065832111064e-06, |
|
"loss": 0.2167, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 2.42836920558038e-06, |
|
"loss": 0.2144, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 2.3858171027049153e-06, |
|
"loss": 0.2277, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 2.343179725074189e-06, |
|
"loss": 0.2347, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 2.300542347443463e-06, |
|
"loss": 0.2235, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 2.2579049698127367e-06, |
|
"loss": 0.2197, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 2.2152675921820104e-06, |
|
"loss": 0.2219, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 2.1726302145512845e-06, |
|
"loss": 0.21, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 2.129992836920558e-06, |
|
"loss": 0.2014, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_BLEU": 59.598575339554586, |
|
"eval_BLEU-Bigram-Precision": 63.173579559333625, |
|
"eval_BLEU-Trigram-Precision": 56.19508137056102, |
|
"eval_BLEU-Unigram-Precision": 75.73942688657515, |
|
"eval_ROUGE-2": 54.010901558700496, |
|
"eval_ROUGE-L": 64.55563198822112, |
|
"eval_Sacre-Bigram-Precision": 62.145801805562805, |
|
"eval_Sacre-Trigram-Precision": 56.11089418196017, |
|
"eval_Sacre-Unigram-Precision": 74.65196078431373, |
|
"eval_SacreBLEU": 59.05906731573519, |
|
"eval_loss": 0.4607221782207489, |
|
"eval_runtime": 1200.6288, |
|
"eval_samples_per_second": 1.03, |
|
"eval_steps_per_second": 1.03, |
|
"step": 98752 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 2.0873554592898322e-06, |
|
"loss": 0.2166, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 2.0448033564143674e-06, |
|
"loss": 0.1819, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 2.002165978783641e-06, |
|
"loss": 0.1905, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 1.9595286011529148e-06, |
|
"loss": 0.2006, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 1.916891223522189e-06, |
|
"loss": 0.2078, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 1.8742538458914625e-06, |
|
"loss": 0.2051, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 1.8317017430159977e-06, |
|
"loss": 0.207, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 1.7890643653852716e-06, |
|
"loss": 0.1757, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 1.7464269877545453e-06, |
|
"loss": 0.2117, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 1.7037896101238192e-06, |
|
"loss": 0.2073, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 1.6611522324930928e-06, |
|
"loss": 0.2056, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 1.6186001296176283e-06, |
|
"loss": 0.1992, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 1.575962751986902e-06, |
|
"loss": 0.2169, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 1.5333253743561758e-06, |
|
"loss": 0.2039, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 1.4906879967254495e-06, |
|
"loss": 0.1897, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 1.4480506190947233e-06, |
|
"loss": 0.2109, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 1.4054985162192586e-06, |
|
"loss": 0.2002, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 1.3628611385885324e-06, |
|
"loss": 0.2071, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 1.3203090357130676e-06, |
|
"loss": 0.2247, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 1.2776716580823415e-06, |
|
"loss": 0.2092, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 1.2350342804516152e-06, |
|
"loss": 0.2184, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 1.1924821775761504e-06, |
|
"loss": 0.2012, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 1.1498447999454243e-06, |
|
"loss": 0.2024, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 1.1072074223146981e-06, |
|
"loss": 0.2178, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 1.0645700446839718e-06, |
|
"loss": 0.2087, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_BLEU": 59.228700736361574, |
|
"eval_BLEU-Bigram-Precision": 62.02898550724638, |
|
"eval_BLEU-Trigram-Precision": 55.13255240443896, |
|
"eval_BLEU-Unigram-Precision": 74.83247481424078, |
|
"eval_ROUGE-2": 53.99481361299642, |
|
"eval_ROUGE-L": 64.48941872388136, |
|
"eval_Sacre-Bigram-Precision": 61.05263157894737, |
|
"eval_Sacre-Trigram-Precision": 55.05952380952381, |
|
"eval_Sacre-Unigram-Precision": 73.72556628025416, |
|
"eval_SacreBLEU": 58.70218229892996, |
|
"eval_loss": 0.4529183506965637, |
|
"eval_runtime": 1120.8668, |
|
"eval_samples_per_second": 1.104, |
|
"eval_steps_per_second": 1.104, |
|
"step": 111096 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 1.0219326670532457e-06, |
|
"loss": 0.2001, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 9.792952894225194e-07, |
|
"loss": 0.1909, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 9.366579117917933e-07, |
|
"loss": 0.1929, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 8.940205341610671e-07, |
|
"loss": 0.1847, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 8.514684312856022e-07, |
|
"loss": 0.1883, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 8.08831053654876e-07, |
|
"loss": 0.1995, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 7.662789507794113e-07, |
|
"loss": 0.2046, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 7.236415731486851e-07, |
|
"loss": 0.1945, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 6.810041955179588e-07, |
|
"loss": 0.2007, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 6.383668178872326e-07, |
|
"loss": 0.1966, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 5.957294402565065e-07, |
|
"loss": 0.1924, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 5.530920626257804e-07, |
|
"loss": 0.1974, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 5.104546849950541e-07, |
|
"loss": 0.2053, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 4.6781730736432786e-07, |
|
"loss": 0.2116, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 4.251799297336017e-07, |
|
"loss": 0.2003, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 3.8254255210287546e-07, |
|
"loss": 0.1966, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 3.3990517447214934e-07, |
|
"loss": 0.1966, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 2.9735307159668455e-07, |
|
"loss": 0.2016, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 2.547156939659583e-07, |
|
"loss": 0.2098, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 2.1207831633523215e-07, |
|
"loss": 0.2038, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 1.6944093870450592e-07, |
|
"loss": 0.1956, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 1.2680356107377972e-07, |
|
"loss": 0.2036, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 8.416618344305353e-08, |
|
"loss": 0.22, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 4.1528805812327316e-08, |
|
"loss": 0.2003, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_BLEU": 60.284861769008316, |
|
"eval_BLEU-Bigram-Precision": 62.98832834957076, |
|
"eval_BLEU-Trigram-Precision": 56.25192327418197, |
|
"eval_BLEU-Unigram-Precision": 75.38118428838013, |
|
"eval_ROUGE-2": 54.858141016126524, |
|
"eval_ROUGE-L": 65.28596759853026, |
|
"eval_Sacre-Bigram-Precision": 62.07553975369712, |
|
"eval_Sacre-Trigram-Precision": 56.22145176379946, |
|
"eval_Sacre-Unigram-Precision": 74.30246076341794, |
|
"eval_SacreBLEU": 59.77673945128229, |
|
"eval_loss": 0.4481608271598816, |
|
"eval_runtime": 1123.8109, |
|
"eval_samples_per_second": 1.101, |
|
"eval_steps_per_second": 1.101, |
|
"step": 123440 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 123440, |
|
"total_flos": 3230087590379520.0, |
|
"train_loss": 0.31656123802249336, |
|
"train_runtime": 24698.2128, |
|
"train_samples_per_second": 9.995, |
|
"train_steps_per_second": 4.998 |
|
} |
|
], |
|
"max_steps": 123440, |
|
"num_train_epochs": 10, |
|
"total_flos": 3230087590379520.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|