|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 91107, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.983535842470941e-05, |
|
"loss": 4.3991, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9670716849418816e-05, |
|
"loss": 1.3497, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9506075274128224e-05, |
|
"loss": 1.0917, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.934143369883763e-05, |
|
"loss": 0.8889, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_exact_match": 18.4, |
|
"eval_f1": 18.8036, |
|
"eval_gen_len": 5.368, |
|
"eval_loss": 0.6919174194335938, |
|
"eval_rouge1": 18.1786, |
|
"eval_rouge2": 8.6387, |
|
"eval_rougeL": 18.1534, |
|
"eval_rougeLsum": 18.0938, |
|
"eval_runtime": 75.0695, |
|
"eval_samples_per_second": 13.321, |
|
"eval_steps_per_second": 4.449, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.917679212354704e-05, |
|
"loss": 0.8433, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9012150548256447e-05, |
|
"loss": 0.8052, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.8847508972965855e-05, |
|
"loss": 0.8465, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8682867397675263e-05, |
|
"loss": 0.7535, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_exact_match": 13.0, |
|
"eval_f1": 21.7903, |
|
"eval_gen_len": 5.483, |
|
"eval_loss": 0.5836785435676575, |
|
"eval_rouge1": 21.3055, |
|
"eval_rouge2": 9.2894, |
|
"eval_rougeL": 21.2178, |
|
"eval_rougeLsum": 21.3015, |
|
"eval_runtime": 76.4558, |
|
"eval_samples_per_second": 13.079, |
|
"eval_steps_per_second": 4.369, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.851822582238467e-05, |
|
"loss": 0.8144, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8353584247094075e-05, |
|
"loss": 0.7393, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8188942671803483e-05, |
|
"loss": 0.7273, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.802430109651289e-05, |
|
"loss": 0.6731, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_exact_match": 11.9, |
|
"eval_f1": 21.595, |
|
"eval_gen_len": 5.467, |
|
"eval_loss": 0.5862796306610107, |
|
"eval_rouge1": 21.739, |
|
"eval_rouge2": 9.5334, |
|
"eval_rougeL": 21.7018, |
|
"eval_rougeLsum": 21.6884, |
|
"eval_runtime": 74.4712, |
|
"eval_samples_per_second": 13.428, |
|
"eval_steps_per_second": 4.485, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.7859659521222298e-05, |
|
"loss": 0.6948, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.769501794593171e-05, |
|
"loss": 0.6698, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.7530376370641117e-05, |
|
"loss": 0.6466, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.7365734795350525e-05, |
|
"loss": 0.6879, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_exact_match": 6.1, |
|
"eval_f1": 22.3924, |
|
"eval_gen_len": 5.95, |
|
"eval_loss": 0.5380619168281555, |
|
"eval_rouge1": 25.3106, |
|
"eval_rouge2": 11.8582, |
|
"eval_rougeL": 25.2639, |
|
"eval_rougeLsum": 25.2861, |
|
"eval_runtime": 76.6994, |
|
"eval_samples_per_second": 13.038, |
|
"eval_steps_per_second": 4.355, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.720109322005993e-05, |
|
"loss": 0.6504, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.7036451644769337e-05, |
|
"loss": 0.6834, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.6871810069478745e-05, |
|
"loss": 0.6759, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.6707168494188152e-05, |
|
"loss": 0.6459, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_exact_match": 8.1, |
|
"eval_f1": 22.3008, |
|
"eval_gen_len": 5.583, |
|
"eval_loss": 0.536933958530426, |
|
"eval_rouge1": 25.1248, |
|
"eval_rouge2": 11.4572, |
|
"eval_rougeL": 25.13, |
|
"eval_rougeLsum": 25.1636, |
|
"eval_runtime": 74.8336, |
|
"eval_samples_per_second": 13.363, |
|
"eval_steps_per_second": 4.463, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.654252691889756e-05, |
|
"loss": 0.6555, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.6377885343606968e-05, |
|
"loss": 0.622, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.6213243768316376e-05, |
|
"loss": 0.6434, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.6048602193025783e-05, |
|
"loss": 0.5865, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_exact_match": 7.8, |
|
"eval_f1": 22.5664, |
|
"eval_gen_len": 5.502, |
|
"eval_loss": 0.5340938568115234, |
|
"eval_rouge1": 25.6756, |
|
"eval_rouge2": 11.0984, |
|
"eval_rougeL": 25.6163, |
|
"eval_rougeLsum": 25.6348, |
|
"eval_runtime": 74.1837, |
|
"eval_samples_per_second": 13.48, |
|
"eval_steps_per_second": 4.502, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.588396061773519e-05, |
|
"loss": 0.573, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.57193190424446e-05, |
|
"loss": 0.6825, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.5554677467154007e-05, |
|
"loss": 0.5871, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.5390035891863414e-05, |
|
"loss": 0.6442, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_exact_match": 8.0, |
|
"eval_f1": 22.9873, |
|
"eval_gen_len": 5.694, |
|
"eval_loss": 0.5320589542388916, |
|
"eval_rouge1": 25.6004, |
|
"eval_rouge2": 11.31, |
|
"eval_rougeL": 25.6195, |
|
"eval_rougeLsum": 25.6027, |
|
"eval_runtime": 75.839, |
|
"eval_samples_per_second": 13.186, |
|
"eval_steps_per_second": 4.404, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5225394316572822e-05, |
|
"loss": 0.6252, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5060752741282227e-05, |
|
"loss": 0.613, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4896111165991638e-05, |
|
"loss": 0.5827, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.4731469590701045e-05, |
|
"loss": 0.5819, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_exact_match": 6.6, |
|
"eval_f1": 23.22, |
|
"eval_gen_len": 5.789, |
|
"eval_loss": 0.5230505466461182, |
|
"eval_rouge1": 25.3937, |
|
"eval_rouge2": 11.1344, |
|
"eval_rougeL": 25.3304, |
|
"eval_rougeLsum": 25.3461, |
|
"eval_runtime": 75.9506, |
|
"eval_samples_per_second": 13.166, |
|
"eval_steps_per_second": 4.398, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.4566828015410453e-05, |
|
"loss": 0.6175, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.440218644011986e-05, |
|
"loss": 0.6132, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.423754486482927e-05, |
|
"loss": 0.6048, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.4072903289538673e-05, |
|
"loss": 0.5908, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_exact_match": 7.3, |
|
"eval_f1": 23.0102, |
|
"eval_gen_len": 5.769, |
|
"eval_loss": 0.5285995006561279, |
|
"eval_rouge1": 25.898, |
|
"eval_rouge2": 11.687, |
|
"eval_rougeL": 25.8492, |
|
"eval_rougeLsum": 25.8886, |
|
"eval_runtime": 76.3215, |
|
"eval_samples_per_second": 13.102, |
|
"eval_steps_per_second": 4.376, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.390826171424808e-05, |
|
"loss": 0.5972, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.374362013895749e-05, |
|
"loss": 0.5995, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.3578978563666896e-05, |
|
"loss": 0.6133, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.3414336988376307e-05, |
|
"loss": 0.5814, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_exact_match": 6.8, |
|
"eval_f1": 23.0215, |
|
"eval_gen_len": 5.956, |
|
"eval_loss": 0.5266488790512085, |
|
"eval_rouge1": 25.7545, |
|
"eval_rouge2": 11.6372, |
|
"eval_rougeL": 25.7025, |
|
"eval_rougeLsum": 25.6994, |
|
"eval_runtime": 77.5697, |
|
"eval_samples_per_second": 12.892, |
|
"eval_steps_per_second": 4.306, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.3249695413085715e-05, |
|
"loss": 0.5501, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.3085053837795123e-05, |
|
"loss": 0.5771, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.2920412262504527e-05, |
|
"loss": 0.5856, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.2755770687213935e-05, |
|
"loss": 0.5969, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_exact_match": 6.6, |
|
"eval_f1": 22.3945, |
|
"eval_gen_len": 5.784, |
|
"eval_loss": 0.5150488018989563, |
|
"eval_rouge1": 26.376, |
|
"eval_rouge2": 11.4531, |
|
"eval_rougeL": 26.3401, |
|
"eval_rougeLsum": 26.3444, |
|
"eval_runtime": 76.9076, |
|
"eval_samples_per_second": 13.003, |
|
"eval_steps_per_second": 4.343, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.2591129111923343e-05, |
|
"loss": 0.6051, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.242648753663275e-05, |
|
"loss": 0.6252, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.226184596134216e-05, |
|
"loss": 0.6002, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.2097204386051566e-05, |
|
"loss": 0.6071, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_exact_match": 7.5, |
|
"eval_f1": 22.429, |
|
"eval_gen_len": 5.577, |
|
"eval_loss": 0.5234242081642151, |
|
"eval_rouge1": 26.0675, |
|
"eval_rouge2": 11.2086, |
|
"eval_rougeL": 26.062, |
|
"eval_rougeLsum": 26.0623, |
|
"eval_runtime": 74.7901, |
|
"eval_samples_per_second": 13.371, |
|
"eval_steps_per_second": 4.466, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.1932562810760974e-05, |
|
"loss": 0.5639, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.176792123547038e-05, |
|
"loss": 0.5438, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.160327966017979e-05, |
|
"loss": 0.5473, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.1438638084889197e-05, |
|
"loss": 0.5743, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_exact_match": 6.1, |
|
"eval_f1": 22.4076, |
|
"eval_gen_len": 5.714, |
|
"eval_loss": 0.5406718850135803, |
|
"eval_rouge1": 27.3834, |
|
"eval_rouge2": 11.9993, |
|
"eval_rougeL": 27.3736, |
|
"eval_rougeLsum": 27.3494, |
|
"eval_runtime": 75.5929, |
|
"eval_samples_per_second": 13.229, |
|
"eval_steps_per_second": 4.418, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.1273996509598605e-05, |
|
"loss": 0.6123, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.1109354934308013e-05, |
|
"loss": 0.5416, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.094471335901742e-05, |
|
"loss": 0.5794, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.0780071783726825e-05, |
|
"loss": 0.5513, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_exact_match": 5.6, |
|
"eval_f1": 23.3636, |
|
"eval_gen_len": 5.841, |
|
"eval_loss": 0.516300618648529, |
|
"eval_rouge1": 27.5598, |
|
"eval_rouge2": 12.4259, |
|
"eval_rougeL": 27.494, |
|
"eval_rougeLsum": 27.5139, |
|
"eval_runtime": 75.7363, |
|
"eval_samples_per_second": 13.204, |
|
"eval_steps_per_second": 4.41, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.0615430208436232e-05, |
|
"loss": 0.5198, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.0450788633145644e-05, |
|
"loss": 0.5419, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.028614705785505e-05, |
|
"loss": 0.594, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.012150548256446e-05, |
|
"loss": 0.5916, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_exact_match": 6.5, |
|
"eval_f1": 23.2412, |
|
"eval_gen_len": 5.805, |
|
"eval_loss": 0.524455726146698, |
|
"eval_rouge1": 26.6244, |
|
"eval_rouge2": 11.8352, |
|
"eval_rougeL": 26.5432, |
|
"eval_rougeLsum": 26.5964, |
|
"eval_runtime": 76.4476, |
|
"eval_samples_per_second": 13.081, |
|
"eval_steps_per_second": 4.369, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.9956863907273867e-05, |
|
"loss": 0.5885, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.979222233198327e-05, |
|
"loss": 0.5536, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.962758075669268e-05, |
|
"loss": 0.5119, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.9462939181402087e-05, |
|
"loss": 0.5046, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_exact_match": 8.6, |
|
"eval_f1": 22.4289, |
|
"eval_gen_len": 5.578, |
|
"eval_loss": 0.5430198907852173, |
|
"eval_rouge1": 26.57, |
|
"eval_rouge2": 11.7677, |
|
"eval_rougeL": 26.6056, |
|
"eval_rougeLsum": 26.5575, |
|
"eval_runtime": 74.902, |
|
"eval_samples_per_second": 13.351, |
|
"eval_steps_per_second": 4.459, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.9298297606111494e-05, |
|
"loss": 0.5077, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.9133656030820902e-05, |
|
"loss": 0.5206, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.8969014455530313e-05, |
|
"loss": 0.5079, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.880437288023972e-05, |
|
"loss": 0.5349, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_exact_match": 5.4, |
|
"eval_f1": 23.2951, |
|
"eval_gen_len": 5.744, |
|
"eval_loss": 0.517284631729126, |
|
"eval_rouge1": 28.0046, |
|
"eval_rouge2": 12.2802, |
|
"eval_rougeL": 28.0025, |
|
"eval_rougeLsum": 27.9706, |
|
"eval_runtime": 75.9315, |
|
"eval_samples_per_second": 13.17, |
|
"eval_steps_per_second": 4.399, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.8639731304949125e-05, |
|
"loss": 0.4871, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.8475089729658533e-05, |
|
"loss": 0.541, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.831044815436794e-05, |
|
"loss": 0.5659, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.814580657907735e-05, |
|
"loss": 0.5231, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_exact_match": 6.1, |
|
"eval_f1": 23.2074, |
|
"eval_gen_len": 5.814, |
|
"eval_loss": 0.5330841541290283, |
|
"eval_rouge1": 27.4091, |
|
"eval_rouge2": 12.3926, |
|
"eval_rougeL": 27.3466, |
|
"eval_rougeLsum": 27.3708, |
|
"eval_runtime": 77.6684, |
|
"eval_samples_per_second": 12.875, |
|
"eval_steps_per_second": 4.3, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.7981165003786756e-05, |
|
"loss": 0.5209, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.7816523428496164e-05, |
|
"loss": 0.5561, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.7651881853205572e-05, |
|
"loss": 0.5269, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.748724027791498e-05, |
|
"loss": 0.5144, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_exact_match": 6.6, |
|
"eval_f1": 22.9561, |
|
"eval_gen_len": 5.764, |
|
"eval_loss": 0.533650815486908, |
|
"eval_rouge1": 27.7074, |
|
"eval_rouge2": 12.5122, |
|
"eval_rougeL": 27.6729, |
|
"eval_rougeLsum": 27.6909, |
|
"eval_runtime": 76.8102, |
|
"eval_samples_per_second": 13.019, |
|
"eval_steps_per_second": 4.348, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.7322598702624387e-05, |
|
"loss": 0.4877, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.7157957127333795e-05, |
|
"loss": 0.5182, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.6993315552043203e-05, |
|
"loss": 0.4982, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.682867397675261e-05, |
|
"loss": 0.5576, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_exact_match": 7.0, |
|
"eval_f1": 22.646, |
|
"eval_gen_len": 5.801, |
|
"eval_loss": 0.5152503848075867, |
|
"eval_rouge1": 27.8201, |
|
"eval_rouge2": 12.335, |
|
"eval_rougeL": 27.806, |
|
"eval_rougeLsum": 27.7887, |
|
"eval_runtime": 77.4695, |
|
"eval_samples_per_second": 12.908, |
|
"eval_steps_per_second": 4.311, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.666403240146202e-05, |
|
"loss": 0.5032, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.6499390826171423e-05, |
|
"loss": 0.5397, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.633474925088083e-05, |
|
"loss": 0.4944, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.617010767559024e-05, |
|
"loss": 0.5184, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_exact_match": 4.8, |
|
"eval_f1": 23.3492, |
|
"eval_gen_len": 5.988, |
|
"eval_loss": 0.5299638509750366, |
|
"eval_rouge1": 28.1638, |
|
"eval_rouge2": 12.5269, |
|
"eval_rougeL": 28.1245, |
|
"eval_rougeLsum": 28.1199, |
|
"eval_runtime": 77.8204, |
|
"eval_samples_per_second": 12.85, |
|
"eval_steps_per_second": 4.292, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.600546610029965e-05, |
|
"loss": 0.488, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.5840824525009057e-05, |
|
"loss": 0.5176, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.5676182949718465e-05, |
|
"loss": 0.4941, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.5511541374427873e-05, |
|
"loss": 0.5321, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_exact_match": 5.2, |
|
"eval_f1": 23.1651, |
|
"eval_gen_len": 5.997, |
|
"eval_loss": 0.5239735245704651, |
|
"eval_rouge1": 27.9328, |
|
"eval_rouge2": 12.5264, |
|
"eval_rougeL": 27.9238, |
|
"eval_rougeLsum": 27.9206, |
|
"eval_runtime": 78.5078, |
|
"eval_samples_per_second": 12.738, |
|
"eval_steps_per_second": 4.254, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.5346899799137277e-05, |
|
"loss": 0.5, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.5182258223846685e-05, |
|
"loss": 0.5279, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.5017616648556093e-05, |
|
"loss": 0.5223, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.4852975073265502e-05, |
|
"loss": 0.478, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_exact_match": 6.4, |
|
"eval_f1": 22.7142, |
|
"eval_gen_len": 5.824, |
|
"eval_loss": 0.5349439382553101, |
|
"eval_rouge1": 27.9589, |
|
"eval_rouge2": 12.5606, |
|
"eval_rougeL": 27.902, |
|
"eval_rougeLsum": 27.8978, |
|
"eval_runtime": 76.8435, |
|
"eval_samples_per_second": 13.013, |
|
"eval_steps_per_second": 4.346, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.4688333497974908e-05, |
|
"loss": 0.482, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.4523691922684316e-05, |
|
"loss": 0.5004, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.4359050347393724e-05, |
|
"loss": 0.5482, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.4194408772103133e-05, |
|
"loss": 0.497, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_exact_match": 5.5, |
|
"eval_f1": 22.6861, |
|
"eval_gen_len": 5.864, |
|
"eval_loss": 0.5399107933044434, |
|
"eval_rouge1": 28.2327, |
|
"eval_rouge2": 12.5728, |
|
"eval_rougeL": 28.1968, |
|
"eval_rougeLsum": 28.21, |
|
"eval_runtime": 77.9, |
|
"eval_samples_per_second": 12.837, |
|
"eval_steps_per_second": 4.288, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.4029767196812539e-05, |
|
"loss": 0.4476, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.3865125621521947e-05, |
|
"loss": 0.5754, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.3700484046231355e-05, |
|
"loss": 0.5296, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.3535842470940762e-05, |
|
"loss": 0.5475, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_exact_match": 8.0, |
|
"eval_f1": 22.2882, |
|
"eval_gen_len": 5.651, |
|
"eval_loss": 0.5568350553512573, |
|
"eval_rouge1": 27.6, |
|
"eval_rouge2": 12.259, |
|
"eval_rougeL": 27.6081, |
|
"eval_rougeLsum": 27.561, |
|
"eval_runtime": 76.4106, |
|
"eval_samples_per_second": 13.087, |
|
"eval_steps_per_second": 4.371, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.337120089565017e-05, |
|
"loss": 0.492, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.3206559320359578e-05, |
|
"loss": 0.5221, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.3041917745068984e-05, |
|
"loss": 0.5491, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.2877276169778392e-05, |
|
"loss": 0.4643, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_exact_match": 8.9, |
|
"eval_f1": 21.3228, |
|
"eval_gen_len": 5.566, |
|
"eval_loss": 0.5837388038635254, |
|
"eval_rouge1": 27.0269, |
|
"eval_rouge2": 11.9442, |
|
"eval_rougeL": 27.0247, |
|
"eval_rougeLsum": 26.9566, |
|
"eval_runtime": 76.2944, |
|
"eval_samples_per_second": 13.107, |
|
"eval_steps_per_second": 4.378, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.2712634594487801e-05, |
|
"loss": 0.5079, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.2547993019197209e-05, |
|
"loss": 0.4905, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.2383351443906615e-05, |
|
"loss": 0.4939, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.2218709868616023e-05, |
|
"loss": 0.5333, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_exact_match": 5.5, |
|
"eval_f1": 23.1489, |
|
"eval_gen_len": 5.764, |
|
"eval_loss": 0.5246403813362122, |
|
"eval_rouge1": 27.9369, |
|
"eval_rouge2": 12.2044, |
|
"eval_rougeL": 27.8886, |
|
"eval_rougeLsum": 27.9162, |
|
"eval_runtime": 76.3987, |
|
"eval_samples_per_second": 13.089, |
|
"eval_steps_per_second": 4.372, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.2054068293325432e-05, |
|
"loss": 0.5067, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.1889426718034838e-05, |
|
"loss": 0.4915, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.1724785142744246e-05, |
|
"loss": 0.4855, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.1560143567453654e-05, |
|
"loss": 0.5356, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_exact_match": 7.5, |
|
"eval_f1": 21.9492, |
|
"eval_gen_len": 5.628, |
|
"eval_loss": 0.559809684753418, |
|
"eval_rouge1": 27.8669, |
|
"eval_rouge2": 12.3269, |
|
"eval_rougeL": 27.8231, |
|
"eval_rougeLsum": 27.8187, |
|
"eval_runtime": 75.9003, |
|
"eval_samples_per_second": 13.175, |
|
"eval_steps_per_second": 4.401, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.1395501992163061e-05, |
|
"loss": 0.5376, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.123086041687247e-05, |
|
"loss": 0.5152, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.1066218841581877e-05, |
|
"loss": 0.5355, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.0901577266291283e-05, |
|
"loss": 0.5154, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_exact_match": 5.5, |
|
"eval_f1": 22.8791, |
|
"eval_gen_len": 5.859, |
|
"eval_loss": 0.5113182663917542, |
|
"eval_rouge1": 28.1362, |
|
"eval_rouge2": 12.2055, |
|
"eval_rougeL": 28.0813, |
|
"eval_rougeLsum": 28.0901, |
|
"eval_runtime": 78.3738, |
|
"eval_samples_per_second": 12.759, |
|
"eval_steps_per_second": 4.262, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.073693569100069e-05, |
|
"loss": 0.5391, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.05722941157101e-05, |
|
"loss": 0.4996, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.0407652540419508e-05, |
|
"loss": 0.5221, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.0243010965128914e-05, |
|
"loss": 0.4974, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_exact_match": 7.7, |
|
"eval_f1": 22.1627, |
|
"eval_gen_len": 5.622, |
|
"eval_loss": 0.529927670955658, |
|
"eval_rouge1": 27.4181, |
|
"eval_rouge2": 11.8982, |
|
"eval_rougeL": 27.3595, |
|
"eval_rougeLsum": 27.362, |
|
"eval_runtime": 76.1366, |
|
"eval_samples_per_second": 13.134, |
|
"eval_steps_per_second": 4.387, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.0078369389838322e-05, |
|
"loss": 0.4769, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.913727814547731e-06, |
|
"loss": 0.5019, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.749086239257137e-06, |
|
"loss": 0.4819, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.584444663966545e-06, |
|
"loss": 0.4368, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_exact_match": 4.9, |
|
"eval_f1": 23.2707, |
|
"eval_gen_len": 5.85, |
|
"eval_loss": 0.533755362033844, |
|
"eval_rouge1": 28.3591, |
|
"eval_rouge2": 12.5844, |
|
"eval_rougeL": 28.2897, |
|
"eval_rougeLsum": 28.3187, |
|
"eval_runtime": 77.3013, |
|
"eval_samples_per_second": 12.936, |
|
"eval_steps_per_second": 4.321, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.419803088675953e-06, |
|
"loss": 0.5444, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.255161513385359e-06, |
|
"loss": 0.4618, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.090519938094768e-06, |
|
"loss": 0.4942, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.925878362804176e-06, |
|
"loss": 0.4685, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_exact_match": 5.4, |
|
"eval_f1": 23.1173, |
|
"eval_gen_len": 5.868, |
|
"eval_loss": 0.5359737277030945, |
|
"eval_rouge1": 28.1371, |
|
"eval_rouge2": 12.7219, |
|
"eval_rougeL": 28.0627, |
|
"eval_rougeLsum": 28.1156, |
|
"eval_runtime": 77.0613, |
|
"eval_samples_per_second": 12.977, |
|
"eval_steps_per_second": 4.334, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.761236787513582e-06, |
|
"loss": 0.4914, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.59659521222299e-06, |
|
"loss": 0.519, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.4319536369324e-06, |
|
"loss": 0.4865, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.267312061641807e-06, |
|
"loss": 0.4889, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_exact_match": 5.8, |
|
"eval_f1": 23.0678, |
|
"eval_gen_len": 5.791, |
|
"eval_loss": 0.550064206123352, |
|
"eval_rouge1": 28.0394, |
|
"eval_rouge2": 12.3157, |
|
"eval_rougeL": 27.9714, |
|
"eval_rougeLsum": 27.9793, |
|
"eval_runtime": 76.4779, |
|
"eval_samples_per_second": 13.076, |
|
"eval_steps_per_second": 4.367, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.102670486351213e-06, |
|
"loss": 0.4684, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.938028911060621e-06, |
|
"loss": 0.4699, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.773387335770029e-06, |
|
"loss": 0.5216, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.608745760479437e-06, |
|
"loss": 0.4384, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_exact_match": 5.4, |
|
"eval_f1": 23.1485, |
|
"eval_gen_len": 5.799, |
|
"eval_loss": 0.5489073991775513, |
|
"eval_rouge1": 28.3405, |
|
"eval_rouge2": 12.6721, |
|
"eval_rougeL": 28.3062, |
|
"eval_rougeLsum": 28.3124, |
|
"eval_runtime": 76.6915, |
|
"eval_samples_per_second": 13.039, |
|
"eval_steps_per_second": 4.355, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.444104185188844e-06, |
|
"loss": 0.48, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.279462609898251e-06, |
|
"loss": 0.4582, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.11482103460766e-06, |
|
"loss": 0.4575, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.9501794593170666e-06, |
|
"loss": 0.4588, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_exact_match": 5.6, |
|
"eval_f1": 23.2198, |
|
"eval_gen_len": 5.809, |
|
"eval_loss": 0.5535086393356323, |
|
"eval_rouge1": 28.2308, |
|
"eval_rouge2": 12.4573, |
|
"eval_rougeL": 28.1886, |
|
"eval_rougeLsum": 28.1639, |
|
"eval_runtime": 76.7297, |
|
"eval_samples_per_second": 13.033, |
|
"eval_steps_per_second": 4.353, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.785537884026475e-06, |
|
"loss": 0.4737, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 6.620896308735882e-06, |
|
"loss": 0.4799, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.45625473344529e-06, |
|
"loss": 0.4545, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 6.2916131581546976e-06, |
|
"loss": 0.5076, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_exact_match": 6.7, |
|
"eval_f1": 22.6072, |
|
"eval_gen_len": 5.739, |
|
"eval_loss": 0.5515512824058533, |
|
"eval_rouge1": 28.0709, |
|
"eval_rouge2": 12.4522, |
|
"eval_rougeL": 27.9586, |
|
"eval_rougeLsum": 27.9825, |
|
"eval_runtime": 76.3124, |
|
"eval_samples_per_second": 13.104, |
|
"eval_steps_per_second": 4.377, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 6.126971582864105e-06, |
|
"loss": 0.442, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 5.962330007573513e-06, |
|
"loss": 0.4464, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 5.79768843228292e-06, |
|
"loss": 0.4756, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.633046856992328e-06, |
|
"loss": 0.4467, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_exact_match": 5.2, |
|
"eval_f1": 22.9357, |
|
"eval_gen_len": 5.876, |
|
"eval_loss": 0.5473840832710266, |
|
"eval_rouge1": 28.2468, |
|
"eval_rouge2": 12.4971, |
|
"eval_rougeL": 28.1601, |
|
"eval_rougeLsum": 28.1898, |
|
"eval_runtime": 77.1004, |
|
"eval_samples_per_second": 12.97, |
|
"eval_steps_per_second": 4.332, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.4684052817017355e-06, |
|
"loss": 0.5136, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.303763706411143e-06, |
|
"loss": 0.4626, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.13912213112055e-06, |
|
"loss": 0.5138, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.974480555829959e-06, |
|
"loss": 0.4262, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_exact_match": 5.5, |
|
"eval_f1": 23.0092, |
|
"eval_gen_len": 5.883, |
|
"eval_loss": 0.5560488700866699, |
|
"eval_rouge1": 28.3505, |
|
"eval_rouge2": 12.633, |
|
"eval_rougeL": 28.2747, |
|
"eval_rougeLsum": 28.2859, |
|
"eval_runtime": 77.2929, |
|
"eval_samples_per_second": 12.938, |
|
"eval_steps_per_second": 4.321, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.809838980539366e-06, |
|
"loss": 0.4773, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.645197405248774e-06, |
|
"loss": 0.4813, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.480555829958181e-06, |
|
"loss": 0.4833, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.315914254667588e-06, |
|
"loss": 0.4812, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_exact_match": 5.5, |
|
"eval_f1": 23.0871, |
|
"eval_gen_len": 5.812, |
|
"eval_loss": 0.5406239032745361, |
|
"eval_rouge1": 28.1686, |
|
"eval_rouge2": 12.4512, |
|
"eval_rougeL": 28.1366, |
|
"eval_rougeLsum": 28.1438, |
|
"eval_runtime": 76.6212, |
|
"eval_samples_per_second": 13.051, |
|
"eval_steps_per_second": 4.359, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.151272679376997e-06, |
|
"loss": 0.4662, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3.9866311040864035e-06, |
|
"loss": 0.4939, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.821989528795812e-06, |
|
"loss": 0.5221, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.6573479535052195e-06, |
|
"loss": 0.4243, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_exact_match": 5.2, |
|
"eval_f1": 22.7384, |
|
"eval_gen_len": 5.908, |
|
"eval_loss": 0.5526044964790344, |
|
"eval_rouge1": 28.0823, |
|
"eval_rouge2": 12.5204, |
|
"eval_rougeL": 28.0028, |
|
"eval_rougeLsum": 28.0533, |
|
"eval_runtime": 77.3998, |
|
"eval_samples_per_second": 12.92, |
|
"eval_steps_per_second": 4.315, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.492706378214627e-06, |
|
"loss": 0.4704, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.328064802924034e-06, |
|
"loss": 0.4731, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.163423227633442e-06, |
|
"loss": 0.4775, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.9987816523428496e-06, |
|
"loss": 0.4828, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_exact_match": 6.7, |
|
"eval_f1": 22.2536, |
|
"eval_gen_len": 5.75, |
|
"eval_loss": 0.5606855154037476, |
|
"eval_rouge1": 27.9073, |
|
"eval_rouge2": 12.4149, |
|
"eval_rougeL": 27.8389, |
|
"eval_rougeLsum": 27.8483, |
|
"eval_runtime": 76.3805, |
|
"eval_samples_per_second": 13.092, |
|
"eval_steps_per_second": 4.373, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.8341400770522574e-06, |
|
"loss": 0.4479, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.669498501761665e-06, |
|
"loss": 0.4923, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.5048569264710725e-06, |
|
"loss": 0.5143, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.3402153511804802e-06, |
|
"loss": 0.4827, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_exact_match": 4.8, |
|
"eval_f1": 23.1203, |
|
"eval_gen_len": 5.883, |
|
"eval_loss": 0.5355603694915771, |
|
"eval_rouge1": 28.5515, |
|
"eval_rouge2": 12.5974, |
|
"eval_rougeL": 28.4917, |
|
"eval_rougeLsum": 28.4884, |
|
"eval_runtime": 77.1315, |
|
"eval_samples_per_second": 12.965, |
|
"eval_steps_per_second": 4.33, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.175573775889888e-06, |
|
"loss": 0.453, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.0109322005992957e-06, |
|
"loss": 0.4689, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.846290625308703e-06, |
|
"loss": 0.4628, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.6816490500181106e-06, |
|
"loss": 0.4599, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_exact_match": 6.0, |
|
"eval_f1": 22.775, |
|
"eval_gen_len": 5.75, |
|
"eval_loss": 0.5486857891082764, |
|
"eval_rouge1": 28.1728, |
|
"eval_rouge2": 12.3968, |
|
"eval_rougeL": 28.1105, |
|
"eval_rougeLsum": 28.1316, |
|
"eval_runtime": 76.2649, |
|
"eval_samples_per_second": 13.112, |
|
"eval_steps_per_second": 4.379, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.5170074747275183e-06, |
|
"loss": 0.4783, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.3523658994369257e-06, |
|
"loss": 0.4793, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.1877243241463334e-06, |
|
"loss": 0.4457, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.0230827488557412e-06, |
|
"loss": 0.4505, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_exact_match": 5.3, |
|
"eval_f1": 23.063, |
|
"eval_gen_len": 5.832, |
|
"eval_loss": 0.5459786057472229, |
|
"eval_rouge1": 28.381, |
|
"eval_rouge2": 12.6567, |
|
"eval_rougeL": 28.3519, |
|
"eval_rougeLsum": 28.3424, |
|
"eval_runtime": 76.7382, |
|
"eval_samples_per_second": 13.031, |
|
"eval_steps_per_second": 4.352, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 8.584411735651487e-07, |
|
"loss": 0.4594, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.937995982745563e-07, |
|
"loss": 0.4719, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.291580229839639e-07, |
|
"loss": 0.4538, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.6451644769337155e-07, |
|
"loss": 0.4741, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_exact_match": 5.3, |
|
"eval_f1": 23.0565, |
|
"eval_gen_len": 5.819, |
|
"eval_loss": 0.5445623993873596, |
|
"eval_rouge1": 28.2709, |
|
"eval_rouge2": 12.4988, |
|
"eval_rougeL": 28.1962, |
|
"eval_rougeLsum": 28.2203, |
|
"eval_runtime": 76.5919, |
|
"eval_samples_per_second": 13.056, |
|
"eval_steps_per_second": 4.361, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.9987487240277914e-07, |
|
"loss": 0.441, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.5233297112186775e-08, |
|
"loss": 0.4993, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 91107, |
|
"total_flos": 1.4035529182858445e+17, |
|
"train_loss": 0.5681342689636171, |
|
"train_runtime": 25047.2432, |
|
"train_samples_per_second": 10.912, |
|
"train_steps_per_second": 3.637 |
|
} |
|
], |
|
"max_steps": 91107, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.4035529182858445e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|