LUKE-Marian-Model-on-CoNaLa / trainer_state.json
AhmedSSoliman's picture
Upload trainer_state.json
9d9026f
raw
history blame
37.7 kB
{
"best_metric": 0.6102247834205627,
"best_model_checkpoint": "./luke-marian-training-1/checkpoint-123440",
"epoch": 10.0,
"global_step": 123440,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 8.052495139338951e-07,
"loss": 0.7856,
"step": 500
},
{
"epoch": 0.08,
"learning_rate": 1.6153596889176928e-06,
"loss": 0.748,
"step": 1000
},
{
"epoch": 0.12,
"learning_rate": 2.4254698639014908e-06,
"loss": 0.7809,
"step": 1500
},
{
"epoch": 0.16,
"learning_rate": 3.2355800388852886e-06,
"loss": 0.7923,
"step": 2000
},
{
"epoch": 0.2,
"learning_rate": 4.044069993519119e-06,
"loss": 0.7799,
"step": 2500
},
{
"epoch": 0.24,
"learning_rate": 4.854180168502917e-06,
"loss": 0.8085,
"step": 3000
},
{
"epoch": 0.28,
"learning_rate": 5.664290343486715e-06,
"loss": 0.7759,
"step": 3500
},
{
"epoch": 0.32,
"learning_rate": 6.474400518470513e-06,
"loss": 0.7833,
"step": 4000
},
{
"epoch": 0.36,
"learning_rate": 7.2828904731043424e-06,
"loss": 0.7804,
"step": 4500
},
{
"epoch": 0.41,
"learning_rate": 8.09300064808814e-06,
"loss": 0.8169,
"step": 5000
},
{
"epoch": 0.45,
"learning_rate": 8.903110823071939e-06,
"loss": 0.8058,
"step": 5500
},
{
"epoch": 0.49,
"learning_rate": 9.713220998055737e-06,
"loss": 0.8265,
"step": 6000
},
{
"epoch": 0.53,
"learning_rate": 9.972541528805813e-06,
"loss": 0.7904,
"step": 6500
},
{
"epoch": 0.57,
"learning_rate": 9.929904151175087e-06,
"loss": 0.7829,
"step": 7000
},
{
"epoch": 0.61,
"learning_rate": 9.88726677354436e-06,
"loss": 0.8126,
"step": 7500
},
{
"epoch": 0.65,
"learning_rate": 9.844629395913635e-06,
"loss": 0.8133,
"step": 8000
},
{
"epoch": 0.69,
"learning_rate": 9.802077293038171e-06,
"loss": 0.7928,
"step": 8500
},
{
"epoch": 0.73,
"learning_rate": 9.759525190162705e-06,
"loss": 0.8564,
"step": 9000
},
{
"epoch": 0.77,
"learning_rate": 9.716887812531979e-06,
"loss": 0.7624,
"step": 9500
},
{
"epoch": 0.81,
"learning_rate": 9.674250434901252e-06,
"loss": 0.7662,
"step": 10000
},
{
"epoch": 0.85,
"learning_rate": 9.631613057270527e-06,
"loss": 0.8363,
"step": 10500
},
{
"epoch": 0.89,
"learning_rate": 9.5889756796398e-06,
"loss": 0.7793,
"step": 11000
},
{
"epoch": 0.93,
"learning_rate": 9.546338302009073e-06,
"loss": 0.7561,
"step": 11500
},
{
"epoch": 0.97,
"learning_rate": 9.50378619913361e-06,
"loss": 0.774,
"step": 12000
},
{
"epoch": 1.0,
"eval_BLEU": 32.0811169920738,
"eval_BLEU-Bigram-Precision": 37.1092045950269,
"eval_BLEU-Trigram-Precision": 26.487573476332887,
"eval_BLEU-Unigram-Precision": 56.60325589903054,
"eval_ROUGE-2": 25.923225512351976,
"eval_ROUGE-L": 40.443599801980355,
"eval_Sacre-Bigram-Precision": 34.86367988711385,
"eval_Sacre-Trigram-Precision": 26.20122541115769,
"eval_Sacre-Unigram-Precision": 54.42125237191651,
"eval_SacreBLEU": 30.767354925943692,
"eval_loss": 0.9131098985671997,
"eval_runtime": 174.9979,
"eval_samples_per_second": 7.069,
"eval_steps_per_second": 7.069,
"step": 12344
},
{
"epoch": 1.01,
"learning_rate": 9.461148821502884e-06,
"loss": 0.7846,
"step": 12500
},
{
"epoch": 1.05,
"learning_rate": 9.418511443872157e-06,
"loss": 0.7368,
"step": 13000
},
{
"epoch": 1.09,
"learning_rate": 9.37587406624143e-06,
"loss": 0.7279,
"step": 13500
},
{
"epoch": 1.13,
"learning_rate": 9.333236688610704e-06,
"loss": 0.7253,
"step": 14000
},
{
"epoch": 1.17,
"learning_rate": 9.290599310979979e-06,
"loss": 0.7259,
"step": 14500
},
{
"epoch": 1.22,
"learning_rate": 9.247961933349252e-06,
"loss": 0.7432,
"step": 15000
},
{
"epoch": 1.26,
"learning_rate": 9.205324555718525e-06,
"loss": 0.7437,
"step": 15500
},
{
"epoch": 1.3,
"learning_rate": 9.1626871780878e-06,
"loss": 0.6641,
"step": 16000
},
{
"epoch": 1.34,
"learning_rate": 9.120135075212335e-06,
"loss": 0.7239,
"step": 16500
},
{
"epoch": 1.38,
"learning_rate": 9.077497697581608e-06,
"loss": 0.7007,
"step": 17000
},
{
"epoch": 1.42,
"learning_rate": 9.034860319950883e-06,
"loss": 0.7107,
"step": 17500
},
{
"epoch": 1.46,
"learning_rate": 8.992222942320156e-06,
"loss": 0.7033,
"step": 18000
},
{
"epoch": 1.5,
"learning_rate": 8.949670839444692e-06,
"loss": 0.6932,
"step": 18500
},
{
"epoch": 1.54,
"learning_rate": 8.907118736569226e-06,
"loss": 0.7351,
"step": 19000
},
{
"epoch": 1.58,
"learning_rate": 8.8644813589385e-06,
"loss": 0.7511,
"step": 19500
},
{
"epoch": 1.62,
"learning_rate": 8.821843981307775e-06,
"loss": 0.6994,
"step": 20000
},
{
"epoch": 1.66,
"learning_rate": 8.779206603677048e-06,
"loss": 0.6792,
"step": 20500
},
{
"epoch": 1.7,
"learning_rate": 8.736569226046321e-06,
"loss": 0.6701,
"step": 21000
},
{
"epoch": 1.74,
"learning_rate": 8.693931848415596e-06,
"loss": 0.6885,
"step": 21500
},
{
"epoch": 1.78,
"learning_rate": 8.65129447078487e-06,
"loss": 0.6796,
"step": 22000
},
{
"epoch": 1.82,
"learning_rate": 8.608657093154144e-06,
"loss": 0.6812,
"step": 22500
},
{
"epoch": 1.86,
"learning_rate": 8.566104990278679e-06,
"loss": 0.704,
"step": 23000
},
{
"epoch": 1.9,
"learning_rate": 8.523467612647952e-06,
"loss": 0.6443,
"step": 23500
},
{
"epoch": 1.94,
"learning_rate": 8.480830235017227e-06,
"loss": 0.6513,
"step": 24000
},
{
"epoch": 1.98,
"learning_rate": 8.4381928573865e-06,
"loss": 0.6532,
"step": 24500
},
{
"epoch": 2.0,
"eval_BLEU": 34.14564345469897,
"eval_BLEU-Bigram-Precision": 42.337954289996254,
"eval_BLEU-Trigram-Precision": 32.169876203576344,
"eval_BLEU-Unigram-Precision": 61.27510040160643,
"eval_ROUGE-2": 28.971835230430752,
"eval_ROUGE-L": 43.230417531342944,
"eval_Sacre-Bigram-Precision": 40.19043401240035,
"eval_Sacre-Trigram-Precision": 31.907054139181078,
"eval_Sacre-Unigram-Precision": 59.04875395057251,
"eval_SacreBLEU": 33.00279853845508,
"eval_loss": 0.8338403701782227,
"eval_runtime": 167.0515,
"eval_samples_per_second": 7.405,
"eval_steps_per_second": 7.405,
"step": 24688
},
{
"epoch": 2.03,
"learning_rate": 8.395555479755773e-06,
"loss": 0.6209,
"step": 25000
},
{
"epoch": 2.07,
"learning_rate": 8.35300337688031e-06,
"loss": 0.6246,
"step": 25500
},
{
"epoch": 2.11,
"learning_rate": 8.310365999249583e-06,
"loss": 0.6061,
"step": 26000
},
{
"epoch": 2.15,
"learning_rate": 8.267728621618858e-06,
"loss": 0.6347,
"step": 26500
},
{
"epoch": 2.19,
"learning_rate": 8.22509124398813e-06,
"loss": 0.609,
"step": 27000
},
{
"epoch": 2.23,
"learning_rate": 8.182539141112665e-06,
"loss": 0.6059,
"step": 27500
},
{
"epoch": 2.27,
"learning_rate": 8.13990176348194e-06,
"loss": 0.6301,
"step": 28000
},
{
"epoch": 2.31,
"learning_rate": 8.097264385851213e-06,
"loss": 0.5859,
"step": 28500
},
{
"epoch": 2.35,
"learning_rate": 8.054627008220487e-06,
"loss": 0.6133,
"step": 29000
},
{
"epoch": 2.39,
"learning_rate": 8.012074905345023e-06,
"loss": 0.5813,
"step": 29500
},
{
"epoch": 2.43,
"learning_rate": 7.969437527714296e-06,
"loss": 0.6049,
"step": 30000
},
{
"epoch": 2.47,
"learning_rate": 7.92680015008357e-06,
"loss": 0.5982,
"step": 30500
},
{
"epoch": 2.51,
"learning_rate": 7.884162772452844e-06,
"loss": 0.5807,
"step": 31000
},
{
"epoch": 2.55,
"learning_rate": 7.841525394822117e-06,
"loss": 0.5809,
"step": 31500
},
{
"epoch": 2.59,
"learning_rate": 7.798973291946653e-06,
"loss": 0.5984,
"step": 32000
},
{
"epoch": 2.63,
"learning_rate": 7.756335914315927e-06,
"loss": 0.6308,
"step": 32500
},
{
"epoch": 2.67,
"learning_rate": 7.7136985366852e-06,
"loss": 0.5964,
"step": 33000
},
{
"epoch": 2.71,
"learning_rate": 7.671061159054473e-06,
"loss": 0.5783,
"step": 33500
},
{
"epoch": 2.75,
"learning_rate": 7.62850905617901e-06,
"loss": 0.5914,
"step": 34000
},
{
"epoch": 2.79,
"learning_rate": 7.585871678548283e-06,
"loss": 0.5716,
"step": 34500
},
{
"epoch": 2.84,
"learning_rate": 7.543319575672818e-06,
"loss": 0.613,
"step": 35000
},
{
"epoch": 2.88,
"learning_rate": 7.500682198042092e-06,
"loss": 0.561,
"step": 35500
},
{
"epoch": 2.92,
"learning_rate": 7.458044820411367e-06,
"loss": 0.5731,
"step": 36000
},
{
"epoch": 2.96,
"learning_rate": 7.41540744278064e-06,
"loss": 0.565,
"step": 36500
},
{
"epoch": 3.0,
"learning_rate": 7.372855339905175e-06,
"loss": 0.6118,
"step": 37000
},
{
"epoch": 3.0,
"eval_BLEU": 39.5916929052258,
"eval_BLEU-Bigram-Precision": 44.30874450415242,
"eval_BLEU-Trigram-Precision": 34.66957832891385,
"eval_BLEU-Unigram-Precision": 61.68977749113189,
"eval_ROUGE-2": 33.461963127391705,
"eval_ROUGE-L": 46.65653806735692,
"eval_Sacre-Bigram-Precision": 42.369437106279214,
"eval_Sacre-Trigram-Precision": 34.54220198406245,
"eval_Sacre-Unigram-Precision": 59.68165957650208,
"eval_SacreBLEU": 38.27100461755832,
"eval_loss": 0.7759082317352295,
"eval_runtime": 173.3948,
"eval_samples_per_second": 7.134,
"eval_steps_per_second": 7.134,
"step": 37032
},
{
"epoch": 3.04,
"learning_rate": 7.33030323702971e-06,
"loss": 0.4871,
"step": 37500
},
{
"epoch": 3.08,
"learning_rate": 7.287665859398984e-06,
"loss": 0.4991,
"step": 38000
},
{
"epoch": 3.12,
"learning_rate": 7.245028481768258e-06,
"loss": 0.5436,
"step": 38500
},
{
"epoch": 3.16,
"learning_rate": 7.202391104137532e-06,
"loss": 0.5165,
"step": 39000
},
{
"epoch": 3.2,
"learning_rate": 7.159753726506805e-06,
"loss": 0.516,
"step": 39500
},
{
"epoch": 3.24,
"learning_rate": 7.11711634887608e-06,
"loss": 0.5173,
"step": 40000
},
{
"epoch": 3.28,
"learning_rate": 7.074478971245353e-06,
"loss": 0.5239,
"step": 40500
},
{
"epoch": 3.32,
"learning_rate": 7.031841593614626e-06,
"loss": 0.5112,
"step": 41000
},
{
"epoch": 3.36,
"learning_rate": 6.9892042159839e-06,
"loss": 0.516,
"step": 41500
},
{
"epoch": 3.4,
"learning_rate": 6.9465668383531745e-06,
"loss": 0.5384,
"step": 42000
},
{
"epoch": 3.44,
"learning_rate": 6.903929460722449e-06,
"loss": 0.5194,
"step": 42500
},
{
"epoch": 3.48,
"learning_rate": 6.861292083091722e-06,
"loss": 0.5006,
"step": 43000
},
{
"epoch": 3.52,
"learning_rate": 6.818739980216257e-06,
"loss": 0.5362,
"step": 43500
},
{
"epoch": 3.56,
"learning_rate": 6.776102602585531e-06,
"loss": 0.5505,
"step": 44000
},
{
"epoch": 3.6,
"learning_rate": 6.733465224954805e-06,
"loss": 0.5247,
"step": 44500
},
{
"epoch": 3.65,
"learning_rate": 6.6908278473240785e-06,
"loss": 0.5104,
"step": 45000
},
{
"epoch": 3.69,
"learning_rate": 6.648275744448614e-06,
"loss": 0.516,
"step": 45500
},
{
"epoch": 3.73,
"learning_rate": 6.605638366817888e-06,
"loss": 0.5097,
"step": 46000
},
{
"epoch": 3.77,
"learning_rate": 6.563000989187162e-06,
"loss": 0.5147,
"step": 46500
},
{
"epoch": 3.81,
"learning_rate": 6.520363611556435e-06,
"loss": 0.4988,
"step": 47000
},
{
"epoch": 3.85,
"learning_rate": 6.47781150868097e-06,
"loss": 0.5143,
"step": 47500
},
{
"epoch": 3.89,
"learning_rate": 6.435174131050244e-06,
"loss": 0.5146,
"step": 48000
},
{
"epoch": 3.93,
"learning_rate": 6.3925367534195185e-06,
"loss": 0.4949,
"step": 48500
},
{
"epoch": 3.97,
"learning_rate": 6.349899375788792e-06,
"loss": 0.5327,
"step": 49000
},
{
"epoch": 4.0,
"eval_BLEU": 41.583977153720554,
"eval_BLEU-Bigram-Precision": 47.79210232463709,
"eval_BLEU-Trigram-Precision": 38.44570809639859,
"eval_BLEU-Unigram-Precision": 64.70223028797936,
"eval_ROUGE-2": 35.92233849645701,
"eval_ROUGE-L": 48.89087193511837,
"eval_Sacre-Bigram-Precision": 46.017043349388665,
"eval_Sacre-Trigram-Precision": 38.29859537834164,
"eval_Sacre-Unigram-Precision": 62.97069051167412,
"eval_SacreBLEU": 40.202754965999326,
"eval_loss": 0.734529972076416,
"eval_runtime": 168.3041,
"eval_samples_per_second": 7.35,
"eval_steps_per_second": 7.35,
"step": 49376
},
{
"epoch": 4.01,
"learning_rate": 6.307347272913327e-06,
"loss": 0.515,
"step": 49500
},
{
"epoch": 4.05,
"learning_rate": 6.264709895282601e-06,
"loss": 0.4603,
"step": 50000
},
{
"epoch": 4.09,
"learning_rate": 6.222072517651875e-06,
"loss": 0.4565,
"step": 50500
},
{
"epoch": 4.13,
"learning_rate": 6.179435140021148e-06,
"loss": 0.4557,
"step": 51000
},
{
"epoch": 4.17,
"learning_rate": 6.136797762390423e-06,
"loss": 0.4586,
"step": 51500
},
{
"epoch": 4.21,
"learning_rate": 6.094245659514958e-06,
"loss": 0.4295,
"step": 52000
},
{
"epoch": 4.25,
"learning_rate": 6.051608281884232e-06,
"loss": 0.4774,
"step": 52500
},
{
"epoch": 4.29,
"learning_rate": 6.009056179008766e-06,
"loss": 0.4555,
"step": 53000
},
{
"epoch": 4.33,
"learning_rate": 5.966418801378041e-06,
"loss": 0.4448,
"step": 53500
},
{
"epoch": 4.37,
"learning_rate": 5.923781423747314e-06,
"loss": 0.4481,
"step": 54000
},
{
"epoch": 4.42,
"learning_rate": 5.881144046116588e-06,
"loss": 0.4288,
"step": 54500
},
{
"epoch": 4.46,
"learning_rate": 5.838506668485862e-06,
"loss": 0.4459,
"step": 55000
},
{
"epoch": 4.5,
"learning_rate": 5.7958692908551366e-06,
"loss": 0.4616,
"step": 55500
},
{
"epoch": 4.54,
"learning_rate": 5.75323191322441e-06,
"loss": 0.4775,
"step": 56000
},
{
"epoch": 4.58,
"learning_rate": 5.710594535593683e-06,
"loss": 0.4414,
"step": 56500
},
{
"epoch": 4.62,
"learning_rate": 5.668042432718218e-06,
"loss": 0.483,
"step": 57000
},
{
"epoch": 4.66,
"learning_rate": 5.625405055087493e-06,
"loss": 0.4671,
"step": 57500
},
{
"epoch": 4.7,
"learning_rate": 5.5827676774567664e-06,
"loss": 0.4696,
"step": 58000
},
{
"epoch": 4.74,
"learning_rate": 5.54013029982604e-06,
"loss": 0.4628,
"step": 58500
},
{
"epoch": 4.78,
"learning_rate": 5.497578196950575e-06,
"loss": 0.4591,
"step": 59000
},
{
"epoch": 4.82,
"learning_rate": 5.45494081931985e-06,
"loss": 0.4362,
"step": 59500
},
{
"epoch": 4.86,
"learning_rate": 5.412303441689123e-06,
"loss": 0.4767,
"step": 60000
},
{
"epoch": 4.9,
"learning_rate": 5.369666064058396e-06,
"loss": 0.4642,
"step": 60500
},
{
"epoch": 4.94,
"learning_rate": 5.3271139611829315e-06,
"loss": 0.4348,
"step": 61000
},
{
"epoch": 4.98,
"learning_rate": 5.2844765835522064e-06,
"loss": 0.4365,
"step": 61500
},
{
"epoch": 5.0,
"eval_BLEU": 43.53907049710156,
"eval_BLEU-Bigram-Precision": 49.90573248407643,
"eval_BLEU-Trigram-Precision": 40.825538394605175,
"eval_BLEU-Unigram-Precision": 66.37906241012367,
"eval_ROUGE-2": 38.22782051808487,
"eval_ROUGE-L": 51.36307879612767,
"eval_Sacre-Bigram-Precision": 48.25423818887177,
"eval_Sacre-Trigram-Precision": 40.784982935153586,
"eval_Sacre-Unigram-Precision": 64.79006681958711,
"eval_SacreBLEU": 42.18614400441328,
"eval_loss": 0.7037733197212219,
"eval_runtime": 168.6409,
"eval_samples_per_second": 7.335,
"eval_steps_per_second": 7.335,
"step": 61720
},
{
"epoch": 5.02,
"learning_rate": 5.241924480676741e-06,
"loss": 0.4345,
"step": 62000
},
{
"epoch": 5.06,
"learning_rate": 5.199372377801276e-06,
"loss": 0.4124,
"step": 62500
},
{
"epoch": 5.1,
"learning_rate": 5.156735000170549e-06,
"loss": 0.3992,
"step": 63000
},
{
"epoch": 5.14,
"learning_rate": 5.114097622539824e-06,
"loss": 0.3928,
"step": 63500
},
{
"epoch": 5.18,
"learning_rate": 5.0714602449090974e-06,
"loss": 0.4428,
"step": 64000
},
{
"epoch": 5.23,
"learning_rate": 5.0288228672783715e-06,
"loss": 0.4003,
"step": 64500
},
{
"epoch": 5.27,
"learning_rate": 4.986185489647646e-06,
"loss": 0.4077,
"step": 65000
},
{
"epoch": 5.31,
"learning_rate": 4.943548112016919e-06,
"loss": 0.4076,
"step": 65500
},
{
"epoch": 5.35,
"learning_rate": 4.900910734386193e-06,
"loss": 0.4138,
"step": 66000
},
{
"epoch": 5.39,
"learning_rate": 4.858273356755466e-06,
"loss": 0.3901,
"step": 66500
},
{
"epoch": 5.43,
"learning_rate": 4.81563597912474e-06,
"loss": 0.4412,
"step": 67000
},
{
"epoch": 5.47,
"learning_rate": 4.7729986014940135e-06,
"loss": 0.4128,
"step": 67500
},
{
"epoch": 5.51,
"learning_rate": 4.730361223863288e-06,
"loss": 0.4382,
"step": 68000
},
{
"epoch": 5.55,
"learning_rate": 4.687809120987823e-06,
"loss": 0.3855,
"step": 68500
},
{
"epoch": 5.59,
"learning_rate": 4.645171743357097e-06,
"loss": 0.4116,
"step": 69000
},
{
"epoch": 5.63,
"learning_rate": 4.60253436572637e-06,
"loss": 0.3977,
"step": 69500
},
{
"epoch": 5.67,
"learning_rate": 4.559896988095644e-06,
"loss": 0.4232,
"step": 70000
},
{
"epoch": 5.71,
"learning_rate": 4.517259610464918e-06,
"loss": 0.4194,
"step": 70500
},
{
"epoch": 5.75,
"learning_rate": 4.4746222328341924e-06,
"loss": 0.389,
"step": 71000
},
{
"epoch": 5.79,
"learning_rate": 4.4319848552034665e-06,
"loss": 0.4051,
"step": 71500
},
{
"epoch": 5.83,
"learning_rate": 4.38934747757274e-06,
"loss": 0.4337,
"step": 72000
},
{
"epoch": 5.87,
"learning_rate": 4.346795374697275e-06,
"loss": 0.4143,
"step": 72500
},
{
"epoch": 5.91,
"learning_rate": 4.304157997066549e-06,
"loss": 0.4146,
"step": 73000
},
{
"epoch": 5.95,
"learning_rate": 4.261520619435823e-06,
"loss": 0.3878,
"step": 73500
},
{
"epoch": 5.99,
"learning_rate": 4.218883241805096e-06,
"loss": 0.3955,
"step": 74000
},
{
"epoch": 6.0,
"eval_BLEU": 46.18598569567297,
"eval_BLEU-Bigram-Precision": 51.835734293717486,
"eval_BLEU-Trigram-Precision": 42.84724073580379,
"eval_BLEU-Unigram-Precision": 67.89297658862876,
"eval_ROUGE-2": 40.77288879803008,
"eval_ROUGE-L": 53.47443573476642,
"eval_Sacre-Bigram-Precision": 50.16359387172163,
"eval_Sacre-Trigram-Precision": 42.76279276279276,
"eval_Sacre-Unigram-Precision": 66.28928362287722,
"eval_SacreBLEU": 44.9656942267261,
"eval_loss": 0.6645232439041138,
"eval_runtime": 169.8377,
"eval_samples_per_second": 7.283,
"eval_steps_per_second": 7.283,
"step": 74064
},
{
"epoch": 6.04,
"learning_rate": 4.176331138929632e-06,
"loss": 0.3766,
"step": 74500
},
{
"epoch": 6.08,
"learning_rate": 4.133693761298906e-06,
"loss": 0.3556,
"step": 75000
},
{
"epoch": 6.12,
"learning_rate": 4.09105638366818e-06,
"loss": 0.3511,
"step": 75500
},
{
"epoch": 6.16,
"learning_rate": 4.048504280792714e-06,
"loss": 0.4021,
"step": 76000
},
{
"epoch": 6.2,
"learning_rate": 4.005952177917249e-06,
"loss": 0.3931,
"step": 76500
},
{
"epoch": 6.24,
"learning_rate": 3.9633148002865234e-06,
"loss": 0.3704,
"step": 77000
},
{
"epoch": 6.28,
"learning_rate": 3.9206774226557975e-06,
"loss": 0.3515,
"step": 77500
},
{
"epoch": 6.32,
"learning_rate": 3.878040045025071e-06,
"loss": 0.3907,
"step": 78000
},
{
"epoch": 6.36,
"learning_rate": 3.835402667394345e-06,
"loss": 0.3643,
"step": 78500
},
{
"epoch": 6.4,
"learning_rate": 3.7927652897636185e-06,
"loss": 0.3708,
"step": 79000
},
{
"epoch": 6.44,
"learning_rate": 3.7501279121328926e-06,
"loss": 0.3697,
"step": 79500
},
{
"epoch": 6.48,
"learning_rate": 3.7074905345021663e-06,
"loss": 0.3486,
"step": 80000
},
{
"epoch": 6.52,
"learning_rate": 3.6648531568714404e-06,
"loss": 0.3552,
"step": 80500
},
{
"epoch": 6.56,
"learning_rate": 3.6222157792407136e-06,
"loss": 0.3812,
"step": 81000
},
{
"epoch": 6.6,
"learning_rate": 3.5795784016099877e-06,
"loss": 0.3622,
"step": 81500
},
{
"epoch": 6.64,
"learning_rate": 3.5369410239792614e-06,
"loss": 0.391,
"step": 82000
},
{
"epoch": 6.68,
"learning_rate": 3.494388921103797e-06,
"loss": 0.4055,
"step": 82500
},
{
"epoch": 6.72,
"learning_rate": 3.4517515434730702e-06,
"loss": 0.3967,
"step": 83000
},
{
"epoch": 6.76,
"learning_rate": 3.4091141658423443e-06,
"loss": 0.3949,
"step": 83500
},
{
"epoch": 6.8,
"learning_rate": 3.3664767882116184e-06,
"loss": 0.3898,
"step": 84000
},
{
"epoch": 6.85,
"learning_rate": 3.3239246853361536e-06,
"loss": 0.3997,
"step": 84500
},
{
"epoch": 6.89,
"learning_rate": 3.281287307705427e-06,
"loss": 0.4075,
"step": 85000
},
{
"epoch": 6.93,
"learning_rate": 3.238649930074701e-06,
"loss": 0.3704,
"step": 85500
},
{
"epoch": 6.97,
"learning_rate": 3.196012552443975e-06,
"loss": 0.3809,
"step": 86000
},
{
"epoch": 7.0,
"eval_BLEU": 48.53041153339425,
"eval_BLEU-Bigram-Precision": 52.751822317890515,
"eval_BLEU-Trigram-Precision": 44.54280358258696,
"eval_BLEU-Unigram-Precision": 67.82913552910786,
"eval_ROUGE-2": 42.93136082972705,
"eval_ROUGE-L": 54.935588774300534,
"eval_Sacre-Bigram-Precision": 51.321117635148816,
"eval_Sacre-Trigram-Precision": 44.494843134078515,
"eval_Sacre-Unigram-Precision": 66.2363645024532,
"eval_SacreBLEU": 47.488510994186036,
"eval_loss": 0.6420064568519592,
"eval_runtime": 175.4955,
"eval_samples_per_second": 7.049,
"eval_steps_per_second": 7.049,
"step": 86408
},
{
"epoch": 7.01,
"learning_rate": 3.1534604495685103e-06,
"loss": 0.3682,
"step": 86500
},
{
"epoch": 7.05,
"learning_rate": 3.110908346693045e-06,
"loss": 0.3759,
"step": 87000
},
{
"epoch": 7.09,
"learning_rate": 3.068270969062319e-06,
"loss": 0.3607,
"step": 87500
},
{
"epoch": 7.13,
"learning_rate": 3.025633591431593e-06,
"loss": 0.3478,
"step": 88000
},
{
"epoch": 7.17,
"learning_rate": 2.982996213800867e-06,
"loss": 0.3474,
"step": 88500
},
{
"epoch": 7.21,
"learning_rate": 2.94035883617014e-06,
"loss": 0.3824,
"step": 89000
},
{
"epoch": 7.25,
"learning_rate": 2.8977214585394142e-06,
"loss": 0.3469,
"step": 89500
},
{
"epoch": 7.29,
"learning_rate": 2.8550840809086883e-06,
"loss": 0.3599,
"step": 90000
},
{
"epoch": 7.33,
"learning_rate": 2.8124467032779616e-06,
"loss": 0.3332,
"step": 90500
},
{
"epoch": 7.37,
"learning_rate": 2.7698093256472356e-06,
"loss": 0.3508,
"step": 91000
},
{
"epoch": 7.41,
"learning_rate": 2.727257222771771e-06,
"loss": 0.3382,
"step": 91500
},
{
"epoch": 7.45,
"learning_rate": 2.684619845141045e-06,
"loss": 0.3583,
"step": 92000
},
{
"epoch": 7.49,
"learning_rate": 2.641982467510318e-06,
"loss": 0.3392,
"step": 92500
},
{
"epoch": 7.53,
"learning_rate": 2.5993450898795923e-06,
"loss": 0.3548,
"step": 93000
},
{
"epoch": 7.57,
"learning_rate": 2.556707712248866e-06,
"loss": 0.3372,
"step": 93500
},
{
"epoch": 7.62,
"learning_rate": 2.5141556093734016e-06,
"loss": 0.3386,
"step": 94000
},
{
"epoch": 7.66,
"learning_rate": 2.471518231742675e-06,
"loss": 0.362,
"step": 94500
},
{
"epoch": 7.7,
"learning_rate": 2.428880854111949e-06,
"loss": 0.3454,
"step": 95000
},
{
"epoch": 7.74,
"learning_rate": 2.3862434764812226e-06,
"loss": 0.3757,
"step": 95500
},
{
"epoch": 7.78,
"learning_rate": 2.3436913736057578e-06,
"loss": 0.3785,
"step": 96000
},
{
"epoch": 7.82,
"learning_rate": 2.3010539959750314e-06,
"loss": 0.3656,
"step": 96500
},
{
"epoch": 7.86,
"learning_rate": 2.2584166183443055e-06,
"loss": 0.36,
"step": 97000
},
{
"epoch": 7.9,
"learning_rate": 2.2157792407135796e-06,
"loss": 0.3522,
"step": 97500
},
{
"epoch": 7.94,
"learning_rate": 2.1732271378381144e-06,
"loss": 0.3511,
"step": 98000
},
{
"epoch": 7.98,
"learning_rate": 2.130589760207388e-06,
"loss": 0.3349,
"step": 98500
},
{
"epoch": 8.0,
"eval_BLEU": 49.57199736076417,
"eval_BLEU-Bigram-Precision": 55.58697298661954,
"eval_BLEU-Trigram-Precision": 47.143087942269375,
"eval_BLEU-Unigram-Precision": 70.68244463454045,
"eval_ROUGE-2": 44.4865922111988,
"eval_ROUGE-L": 56.71133192128113,
"eval_Sacre-Bigram-Precision": 53.98119122257053,
"eval_Sacre-Trigram-Precision": 47.064737753449144,
"eval_Sacre-Unigram-Precision": 69.11714187564411,
"eval_SacreBLEU": 48.47735653922784,
"eval_loss": 0.6274014711380005,
"eval_runtime": 169.1165,
"eval_samples_per_second": 7.314,
"eval_steps_per_second": 7.314,
"step": 98752
},
{
"epoch": 8.02,
"learning_rate": 2.087952382576662e-06,
"loss": 0.3635,
"step": 99000
},
{
"epoch": 8.06,
"learning_rate": 2.0453150049459363e-06,
"loss": 0.3149,
"step": 99500
},
{
"epoch": 8.1,
"learning_rate": 2.00267762731521e-06,
"loss": 0.3348,
"step": 100000
},
{
"epoch": 8.14,
"learning_rate": 1.9600402496844836e-06,
"loss": 0.3358,
"step": 100500
},
{
"epoch": 8.18,
"learning_rate": 1.9174028720537573e-06,
"loss": 0.3405,
"step": 101000
},
{
"epoch": 8.22,
"learning_rate": 1.8747654944230311e-06,
"loss": 0.328,
"step": 101500
},
{
"epoch": 8.26,
"learning_rate": 1.8322133915475666e-06,
"loss": 0.3487,
"step": 102000
},
{
"epoch": 8.3,
"learning_rate": 1.7895760139168402e-06,
"loss": 0.3012,
"step": 102500
},
{
"epoch": 8.34,
"learning_rate": 1.746938636286114e-06,
"loss": 0.3456,
"step": 103000
},
{
"epoch": 8.38,
"learning_rate": 1.7043012586553878e-06,
"loss": 0.3472,
"step": 103500
},
{
"epoch": 8.43,
"learning_rate": 1.6617491557799232e-06,
"loss": 0.3476,
"step": 104000
},
{
"epoch": 8.47,
"learning_rate": 1.6191117781491968e-06,
"loss": 0.3262,
"step": 104500
},
{
"epoch": 8.51,
"learning_rate": 1.5765596752737323e-06,
"loss": 0.3607,
"step": 105000
},
{
"epoch": 8.55,
"learning_rate": 1.533922297643006e-06,
"loss": 0.341,
"step": 105500
},
{
"epoch": 8.59,
"learning_rate": 1.4912849200122798e-06,
"loss": 0.3164,
"step": 106000
},
{
"epoch": 8.63,
"learning_rate": 1.4486475423815535e-06,
"loss": 0.354,
"step": 106500
},
{
"epoch": 8.67,
"learning_rate": 1.4060101647508274e-06,
"loss": 0.3282,
"step": 107000
},
{
"epoch": 8.71,
"learning_rate": 1.363372787120101e-06,
"loss": 0.3419,
"step": 107500
},
{
"epoch": 8.75,
"learning_rate": 1.320735409489375e-06,
"loss": 0.3524,
"step": 108000
},
{
"epoch": 8.79,
"learning_rate": 1.2780980318586486e-06,
"loss": 0.3372,
"step": 108500
},
{
"epoch": 8.83,
"learning_rate": 1.2354606542279224e-06,
"loss": 0.358,
"step": 109000
},
{
"epoch": 8.87,
"learning_rate": 1.1929085513524577e-06,
"loss": 0.3426,
"step": 109500
},
{
"epoch": 8.91,
"learning_rate": 1.150356448476993e-06,
"loss": 0.3449,
"step": 110000
},
{
"epoch": 8.95,
"learning_rate": 1.1077190708462667e-06,
"loss": 0.3547,
"step": 110500
},
{
"epoch": 8.99,
"learning_rate": 1.0650816932155406e-06,
"loss": 0.347,
"step": 111000
},
{
"epoch": 9.0,
"eval_BLEU": 50.42765483523289,
"eval_BLEU-Bigram-Precision": 55.31231468669697,
"eval_BLEU-Trigram-Precision": 47.11826938259908,
"eval_BLEU-Unigram-Precision": 69.82722488706749,
"eval_ROUGE-2": 44.880113409125535,
"eval_ROUGE-L": 56.82064108206701,
"eval_Sacre-Bigram-Precision": 53.78361795684631,
"eval_Sacre-Trigram-Precision": 47.04437530702472,
"eval_Sacre-Unigram-Precision": 68.36258716037509,
"eval_SacreBLEU": 49.36901330124516,
"eval_loss": 0.6147586703300476,
"eval_runtime": 169.9129,
"eval_samples_per_second": 7.28,
"eval_steps_per_second": 7.28,
"step": 111096
},
{
"epoch": 9.03,
"learning_rate": 1.0224443155848143e-06,
"loss": 0.3398,
"step": 111500
},
{
"epoch": 9.07,
"learning_rate": 9.798069379540882e-07,
"loss": 0.3255,
"step": 112000
},
{
"epoch": 9.11,
"learning_rate": 9.37169560323362e-07,
"loss": 0.3178,
"step": 112500
},
{
"epoch": 9.15,
"learning_rate": 8.946174574478971e-07,
"loss": 0.3281,
"step": 113000
},
{
"epoch": 9.19,
"learning_rate": 8.520653545724324e-07,
"loss": 0.3261,
"step": 113500
},
{
"epoch": 9.24,
"learning_rate": 8.094279769417062e-07,
"loss": 0.3275,
"step": 114000
},
{
"epoch": 9.28,
"learning_rate": 7.6679059931098e-07,
"loss": 0.3359,
"step": 114500
},
{
"epoch": 9.32,
"learning_rate": 7.241532216802538e-07,
"loss": 0.3165,
"step": 115000
},
{
"epoch": 9.36,
"learning_rate": 6.815158440495275e-07,
"loss": 0.3199,
"step": 115500
},
{
"epoch": 9.4,
"learning_rate": 6.388784664188015e-07,
"loss": 0.332,
"step": 116000
},
{
"epoch": 9.44,
"learning_rate": 5.962410887880752e-07,
"loss": 0.3227,
"step": 116500
},
{
"epoch": 9.48,
"learning_rate": 5.536037111573491e-07,
"loss": 0.3203,
"step": 117000
},
{
"epoch": 9.52,
"learning_rate": 5.109663335266228e-07,
"loss": 0.347,
"step": 117500
},
{
"epoch": 9.56,
"learning_rate": 4.6832895589589667e-07,
"loss": 0.3485,
"step": 118000
},
{
"epoch": 9.6,
"learning_rate": 4.2569157826517044e-07,
"loss": 0.3244,
"step": 118500
},
{
"epoch": 9.64,
"learning_rate": 3.830542006344442e-07,
"loss": 0.3351,
"step": 119000
},
{
"epoch": 9.68,
"learning_rate": 3.4041682300371804e-07,
"loss": 0.3341,
"step": 119500
},
{
"epoch": 9.72,
"learning_rate": 2.9786472012825325e-07,
"loss": 0.328,
"step": 120000
},
{
"epoch": 9.76,
"learning_rate": 2.5522734249752707e-07,
"loss": 0.3447,
"step": 120500
},
{
"epoch": 9.8,
"learning_rate": 2.1258996486680084e-07,
"loss": 0.3326,
"step": 121000
},
{
"epoch": 9.84,
"learning_rate": 1.6995258723607467e-07,
"loss": 0.3259,
"step": 121500
},
{
"epoch": 9.88,
"learning_rate": 1.274004843606099e-07,
"loss": 0.3355,
"step": 122000
},
{
"epoch": 9.92,
"learning_rate": 8.47631067298837e-08,
"loss": 0.3653,
"step": 122500
},
{
"epoch": 9.96,
"learning_rate": 4.212572909915749e-08,
"loss": 0.3359,
"step": 123000
},
{
"epoch": 10.0,
"eval_BLEU": 50.8713809636162,
"eval_BLEU-Bigram-Precision": 55.25529935869192,
"eval_BLEU-Trigram-Precision": 47.15997915581032,
"eval_BLEU-Unigram-Precision": 69.56702363367799,
"eval_ROUGE-2": 45.59251780348448,
"eval_ROUGE-L": 57.14390377702154,
"eval_Sacre-Bigram-Precision": 53.736780853109344,
"eval_Sacre-Trigram-Precision": 47.052790672568285,
"eval_Sacre-Unigram-Precision": 68.06666666666666,
"eval_SacreBLEU": 49.82287256259556,
"eval_loss": 0.6102247834205627,
"eval_runtime": 172.6687,
"eval_samples_per_second": 7.164,
"eval_steps_per_second": 7.164,
"step": 123440
},
{
"epoch": 10.0,
"step": 123440,
"total_flos": 3771355808563200.0,
"train_loss": 0.4878676912032082,
"train_runtime": 11854.8374,
"train_samples_per_second": 20.824,
"train_steps_per_second": 10.413
}
],
"max_steps": 123440,
"num_train_epochs": 10,
"total_flos": 3771355808563200.0,
"trial_name": null,
"trial_params": null
}