|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.996592844974447, |
|
"global_step": 88000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002985397907033341, |
|
"loss": 4.2075, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029707958140666826, |
|
"loss": 3.7047, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002956193721100024, |
|
"loss": 3.5253, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00029415916281333654, |
|
"loss": 3.4404, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002926989535166707, |
|
"loss": 3.3437, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00029123874422000483, |
|
"loss": 3.3357, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.000289778534923339, |
|
"loss": 3.275, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00028831832562667317, |
|
"loss": 3.2801, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002868581163300073, |
|
"loss": 3.2396, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00028539790703334145, |
|
"loss": 3.2389, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_gen_len": 15.0482, |
|
"eval_loss": 2.7936947345733643, |
|
"eval_rouge1": 26.3204, |
|
"eval_rouge2": 9.4077, |
|
"eval_rougeL": 23.6598, |
|
"eval_rougeLsum": 23.8655, |
|
"eval_runtime": 530.9697, |
|
"eval_samples_per_second": 18.837, |
|
"eval_steps_per_second": 2.356, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00028393769773667557, |
|
"loss": 3.2308, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00028247748844000974, |
|
"loss": 3.1836, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00028101727914334385, |
|
"loss": 3.2004, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00027955706984667797, |
|
"loss": 3.1561, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00027809686055001214, |
|
"loss": 3.1582, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00027663665125334625, |
|
"loss": 3.1249, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002751764419566804, |
|
"loss": 3.1236, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0002737162326600146, |
|
"loss": 3.0904, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002722560233633487, |
|
"loss": 3.1179, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002707958140666829, |
|
"loss": 3.0741, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_gen_len": 14.4006, |
|
"eval_loss": 2.6971848011016846, |
|
"eval_rouge1": 26.9258, |
|
"eval_rouge2": 9.7445, |
|
"eval_rougeL": 24.2045, |
|
"eval_rougeLsum": 24.4155, |
|
"eval_runtime": 528.5359, |
|
"eval_samples_per_second": 18.924, |
|
"eval_steps_per_second": 2.367, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00026933560477001705, |
|
"loss": 3.0814, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00026787539547335116, |
|
"loss": 3.1, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00026641518617668533, |
|
"loss": 3.0672, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00026495497688001945, |
|
"loss": 3.0728, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0002634947675833536, |
|
"loss": 3.0516, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00026203455828668773, |
|
"loss": 3.0587, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00026057434899002185, |
|
"loss": 3.061, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.000259114139693356, |
|
"loss": 3.0404, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00025765393039669013, |
|
"loss": 3.0352, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002561937211000243, |
|
"loss": 2.9119, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_gen_len": 15.1769, |
|
"eval_loss": 2.641094923019409, |
|
"eval_rouge1": 27.6132, |
|
"eval_rouge2": 10.2843, |
|
"eval_rougeL": 24.6804, |
|
"eval_rougeLsum": 24.9483, |
|
"eval_runtime": 519.8172, |
|
"eval_samples_per_second": 19.241, |
|
"eval_steps_per_second": 2.407, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00025473351180335847, |
|
"loss": 2.8945, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002532733025066926, |
|
"loss": 2.8809, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00025181309321002676, |
|
"loss": 2.8548, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0002503528839133609, |
|
"loss": 2.8613, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00024889267461669504, |
|
"loss": 2.8463, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0002474324653200292, |
|
"loss": 2.8409, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0002459722560233633, |
|
"loss": 2.8519, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0002445120467266975, |
|
"loss": 2.8457, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0002430518374300316, |
|
"loss": 2.8603, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00024159162813336578, |
|
"loss": 2.8272, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_gen_len": 15.1337, |
|
"eval_loss": 2.596390962600708, |
|
"eval_rouge1": 27.5723, |
|
"eval_rouge2": 10.131, |
|
"eval_rougeL": 24.6773, |
|
"eval_rougeLsum": 24.9496, |
|
"eval_runtime": 529.2072, |
|
"eval_samples_per_second": 18.9, |
|
"eval_steps_per_second": 2.364, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0002401314188366999, |
|
"loss": 2.8576, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00023867120954003404, |
|
"loss": 2.8396, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0002372110002433682, |
|
"loss": 2.838, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00023575079094670232, |
|
"loss": 2.842, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002342905816500365, |
|
"loss": 2.8255, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0002328303723533706, |
|
"loss": 2.8173, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00023137016305670478, |
|
"loss": 2.8115, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00022990995376003892, |
|
"loss": 2.8148, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00022844974446337306, |
|
"loss": 2.8155, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0002269895351667072, |
|
"loss": 2.8135, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_gen_len": 15.4393, |
|
"eval_loss": 2.5782437324523926, |
|
"eval_rouge1": 28.7444, |
|
"eval_rouge2": 10.8958, |
|
"eval_rougeL": 25.6842, |
|
"eval_rougeLsum": 25.9691, |
|
"eval_runtime": 520.1126, |
|
"eval_samples_per_second": 19.23, |
|
"eval_steps_per_second": 2.405, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00022552932587004137, |
|
"loss": 2.8161, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0002240691165733755, |
|
"loss": 2.8132, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00022260890727670966, |
|
"loss": 2.8192, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00022114869798004378, |
|
"loss": 2.8098, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00021968848868337792, |
|
"loss": 2.776, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.0002182282793867121, |
|
"loss": 2.7931, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0002167680700900462, |
|
"loss": 2.7675, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00021530786079338037, |
|
"loss": 2.788, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.0002138476514967145, |
|
"loss": 2.7194, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00021238744220004866, |
|
"loss": 2.6235, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_gen_len": 14.9003, |
|
"eval_loss": 2.551361322402954, |
|
"eval_rouge1": 29.1138, |
|
"eval_rouge2": 11.1582, |
|
"eval_rougeL": 25.9524, |
|
"eval_rougeLsum": 26.2659, |
|
"eval_runtime": 524.2558, |
|
"eval_samples_per_second": 19.078, |
|
"eval_steps_per_second": 2.386, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.0002109272329033828, |
|
"loss": 2.6189, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00020946702360671694, |
|
"loss": 2.6318, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00020800681431005108, |
|
"loss": 2.6356, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00020654660501338525, |
|
"loss": 2.6379, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00020508639571671937, |
|
"loss": 2.6018, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.00020362618642005354, |
|
"loss": 2.6223, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.00020216597712338765, |
|
"loss": 2.6421, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0002007057678267218, |
|
"loss": 2.6397, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.00019924555853005597, |
|
"loss": 2.6401, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.00019778534923339008, |
|
"loss": 2.641, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_gen_len": 14.8827, |
|
"eval_loss": 2.526247978210449, |
|
"eval_rouge1": 29.1569, |
|
"eval_rouge2": 11.2473, |
|
"eval_rougeL": 26.0977, |
|
"eval_rougeLsum": 26.4149, |
|
"eval_runtime": 516.1021, |
|
"eval_samples_per_second": 19.38, |
|
"eval_steps_per_second": 2.424, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.00019632513993672425, |
|
"loss": 2.6228, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.0001948649306400584, |
|
"loss": 2.6487, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.00019340472134339254, |
|
"loss": 2.6373, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.00019194451204672668, |
|
"loss": 2.6493, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.00019048430275006085, |
|
"loss": 2.6378, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00018902409345339496, |
|
"loss": 2.6264, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.00018756388415672913, |
|
"loss": 2.6223, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.00018610367486006325, |
|
"loss": 2.6156, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00018464346556339742, |
|
"loss": 2.6221, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.00018318325626673156, |
|
"loss": 2.6342, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_gen_len": 14.8778, |
|
"eval_loss": 2.5141656398773193, |
|
"eval_rouge1": 29.2214, |
|
"eval_rouge2": 11.1812, |
|
"eval_rougeL": 26.1029, |
|
"eval_rougeLsum": 26.3802, |
|
"eval_runtime": 526.5352, |
|
"eval_samples_per_second": 18.996, |
|
"eval_steps_per_second": 2.376, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.00018172304697006568, |
|
"loss": 2.6198, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.00018026283767339985, |
|
"loss": 2.6206, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.00017880262837673396, |
|
"loss": 2.6106, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.00017734241908006813, |
|
"loss": 2.6162, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.00017588220978340227, |
|
"loss": 2.6218, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.00017442200048673642, |
|
"loss": 2.6045, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.00017296179119007056, |
|
"loss": 2.579, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.00017150158189340473, |
|
"loss": 2.6103, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.00017004137259673884, |
|
"loss": 2.454, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.000168581163300073, |
|
"loss": 2.4637, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_gen_len": 15.2305, |
|
"eval_loss": 2.511312246322632, |
|
"eval_rouge1": 29.6711, |
|
"eval_rouge2": 11.5, |
|
"eval_rougeL": 26.4325, |
|
"eval_rougeLsum": 26.7634, |
|
"eval_runtime": 524.3633, |
|
"eval_samples_per_second": 19.075, |
|
"eval_steps_per_second": 2.386, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.00016712095400340713, |
|
"loss": 2.4633, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.0001656607447067413, |
|
"loss": 2.4437, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.00016420053541007544, |
|
"loss": 2.4744, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.00016274032611340956, |
|
"loss": 2.4618, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.00016128011681674372, |
|
"loss": 2.4945, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.00015981990752007784, |
|
"loss": 2.4747, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.000158359698223412, |
|
"loss": 2.4632, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.00015689948892674615, |
|
"loss": 2.4892, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 0.0001554392796300803, |
|
"loss": 2.4389, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 0.00015397907033341444, |
|
"loss": 2.5028, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"eval_gen_len": 15.2348, |
|
"eval_loss": 2.494405746459961, |
|
"eval_rouge1": 29.8317, |
|
"eval_rouge2": 11.5675, |
|
"eval_rougeL": 26.4741, |
|
"eval_rougeLsum": 26.7943, |
|
"eval_runtime": 526.1631, |
|
"eval_samples_per_second": 19.009, |
|
"eval_steps_per_second": 2.378, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.0001525188610367486, |
|
"loss": 2.457, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.00015105865174008272, |
|
"loss": 2.4586, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.00014959844244341686, |
|
"loss": 2.4731, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.000148138233146751, |
|
"loss": 2.492, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.00014667802385008518, |
|
"loss": 2.485, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 0.00014521781455341932, |
|
"loss": 2.4679, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.00014375760525675346, |
|
"loss": 2.4675, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.0001422973959600876, |
|
"loss": 2.4659, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 0.00014083718666342175, |
|
"loss": 2.472, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.0001393769773667559, |
|
"loss": 2.4507, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_gen_len": 14.9608, |
|
"eval_loss": 2.479571580886841, |
|
"eval_rouge1": 29.8158, |
|
"eval_rouge2": 11.6394, |
|
"eval_rougeL": 26.5371, |
|
"eval_rougeLsum": 26.8625, |
|
"eval_runtime": 518.7283, |
|
"eval_samples_per_second": 19.282, |
|
"eval_steps_per_second": 2.412, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.00013791676807009003, |
|
"loss": 2.489, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.00013645655877342417, |
|
"loss": 2.4626, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 0.00013499634947675834, |
|
"loss": 2.4534, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.00013353614018009246, |
|
"loss": 2.4664, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.0001320759308834266, |
|
"loss": 2.4927, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.00013061572158676074, |
|
"loss": 2.4654, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.00012915551229009489, |
|
"loss": 2.48, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.00012769530299342906, |
|
"loss": 2.3542, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.0001262350936967632, |
|
"loss": 2.3249, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.00012477488440009734, |
|
"loss": 2.3282, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"eval_gen_len": 14.9966, |
|
"eval_loss": 2.4929299354553223, |
|
"eval_rouge1": 29.8621, |
|
"eval_rouge2": 11.5076, |
|
"eval_rougeL": 26.5598, |
|
"eval_rougeLsum": 26.9032, |
|
"eval_runtime": 525.8633, |
|
"eval_samples_per_second": 19.02, |
|
"eval_steps_per_second": 2.379, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 0.00012331467510343148, |
|
"loss": 2.3419, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.00012185446580676563, |
|
"loss": 2.3247, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.00012039425651009977, |
|
"loss": 2.3349, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.00011893404721343392, |
|
"loss": 2.3203, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.00011747383791676807, |
|
"loss": 2.3494, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.00011601362862010221, |
|
"loss": 2.3575, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.00011455341932343635, |
|
"loss": 2.343, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.00011309321002677048, |
|
"loss": 2.3609, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.00011163300073010464, |
|
"loss": 2.3294, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.00011017279143343878, |
|
"loss": 2.3218, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_gen_len": 15.2109, |
|
"eval_loss": 2.473184585571289, |
|
"eval_rouge1": 30.1018, |
|
"eval_rouge2": 11.8166, |
|
"eval_rougeL": 26.72, |
|
"eval_rougeLsum": 27.0393, |
|
"eval_runtime": 525.9361, |
|
"eval_samples_per_second": 19.018, |
|
"eval_steps_per_second": 2.379, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 0.00010871258213677292, |
|
"loss": 2.3574, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.00010725237284010706, |
|
"loss": 2.3396, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 0.00010579216354344122, |
|
"loss": 2.3742, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.00010433195424677536, |
|
"loss": 2.3382, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.0001028717449501095, |
|
"loss": 2.3518, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.00010141153565344366, |
|
"loss": 2.3659, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 9.99513263567778e-05, |
|
"loss": 2.3487, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 9.849111706011195e-05, |
|
"loss": 2.3337, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 9.703090776344609e-05, |
|
"loss": 2.3426, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 9.557069846678024e-05, |
|
"loss": 2.3321, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"eval_gen_len": 14.7861, |
|
"eval_loss": 2.462064266204834, |
|
"eval_rouge1": 30.3513, |
|
"eval_rouge2": 11.9542, |
|
"eval_rougeL": 27.074, |
|
"eval_rougeLsum": 27.39, |
|
"eval_runtime": 529.0668, |
|
"eval_samples_per_second": 18.905, |
|
"eval_steps_per_second": 2.365, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 9.411048917011437e-05, |
|
"loss": 2.3369, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 9.265027987344852e-05, |
|
"loss": 2.3536, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 9.119007057678266e-05, |
|
"loss": 2.3364, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 8.97298612801168e-05, |
|
"loss": 2.3412, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 8.826965198345096e-05, |
|
"loss": 2.3364, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 8.68094426867851e-05, |
|
"loss": 2.3605, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 8.534923339011924e-05, |
|
"loss": 2.3124, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 8.388902409345338e-05, |
|
"loss": 2.2233, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 8.242881479678754e-05, |
|
"loss": 2.2063, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 8.096860550012168e-05, |
|
"loss": 2.2197, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"eval_gen_len": 14.9783, |
|
"eval_loss": 2.4723336696624756, |
|
"eval_rouge1": 30.3337, |
|
"eval_rouge2": 11.9362, |
|
"eval_rougeL": 27.0161, |
|
"eval_rougeLsum": 27.365, |
|
"eval_runtime": 522.7349, |
|
"eval_samples_per_second": 19.134, |
|
"eval_steps_per_second": 2.393, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 7.950839620345582e-05, |
|
"loss": 2.2218, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 7.804818690678997e-05, |
|
"loss": 2.2415, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 7.658797761012412e-05, |
|
"loss": 2.2132, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 7.512776831345825e-05, |
|
"loss": 2.2604, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 7.366755901679241e-05, |
|
"loss": 2.2573, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 7.220734972012655e-05, |
|
"loss": 2.2291, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 7.074714042346069e-05, |
|
"loss": 2.2489, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 6.928693112679484e-05, |
|
"loss": 2.225, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 6.782672183012898e-05, |
|
"loss": 2.2077, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 6.636651253346312e-05, |
|
"loss": 2.2273, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"eval_gen_len": 14.8784, |
|
"eval_loss": 2.4681761264801025, |
|
"eval_rouge1": 30.4217, |
|
"eval_rouge2": 12.0307, |
|
"eval_rougeL": 27.11, |
|
"eval_rougeLsum": 27.4155, |
|
"eval_runtime": 524.8636, |
|
"eval_samples_per_second": 19.056, |
|
"eval_steps_per_second": 2.383, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 6.490630323679726e-05, |
|
"loss": 2.2231, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 6.344609394013142e-05, |
|
"loss": 2.2679, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 6.198588464346556e-05, |
|
"loss": 2.2423, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 6.05256753467997e-05, |
|
"loss": 2.2195, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 5.9065466050133846e-05, |
|
"loss": 2.2439, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 5.760525675346799e-05, |
|
"loss": 2.2261, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 5.614504745680214e-05, |
|
"loss": 2.2562, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 5.468483816013628e-05, |
|
"loss": 2.2477, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 5.322462886347043e-05, |
|
"loss": 2.2257, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 5.176441956680457e-05, |
|
"loss": 2.2407, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_gen_len": 14.8097, |
|
"eval_loss": 2.4504895210266113, |
|
"eval_rouge1": 30.4682, |
|
"eval_rouge2": 11.9828, |
|
"eval_rougeL": 27.0725, |
|
"eval_rougeLsum": 27.3849, |
|
"eval_runtime": 526.291, |
|
"eval_samples_per_second": 19.005, |
|
"eval_steps_per_second": 2.377, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 5.0304210270138715e-05, |
|
"loss": 2.2334, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 4.884400097347286e-05, |
|
"loss": 2.2468, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 4.7383791676807007e-05, |
|
"loss": 2.2107, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 4.592358238014115e-05, |
|
"loss": 2.2534, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 4.44633730834753e-05, |
|
"loss": 2.2311, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.300316378680944e-05, |
|
"loss": 2.2225, |
|
"step": 88000 |
|
} |
|
], |
|
"max_steps": 102725, |
|
"num_train_epochs": 7, |
|
"total_flos": 2.7215384355038822e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|