results / trainer_state.json
nlparabic's picture
End of training
76bdd9c verified
raw
history blame
11.2 kB
{
"best_metric": 1.9084105491638184,
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/results/checkpoint-8500",
"epoch": 20.0,
"eval_steps": 500,
"global_step": 9220,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.567398119122257,
"grad_norm": 1.353641152381897,
"learning_rate": 5e-05,
"loss": 3.359,
"step": 500
},
{
"epoch": 1.567398119122257,
"eval_bleu": 0.11424038411303619,
"eval_loss": 3.128293514251709,
"eval_rouge1": 0.3297614987151056,
"eval_rouge2": 0.08429294540985294,
"eval_rougeL": 0.2561476738686219,
"eval_runtime": 26.8133,
"eval_samples_per_second": 31.589,
"eval_steps_per_second": 3.953,
"step": 500
},
{
"epoch": 3.134796238244514,
"grad_norm": 1.156111717224121,
"learning_rate": 2.71689497716895e-05,
"loss": 2.9208,
"step": 1000
},
{
"epoch": 3.134796238244514,
"eval_bleu": 0.1490666503828191,
"eval_loss": 2.729825496673584,
"eval_rouge1": 0.40409071928626966,
"eval_rouge2": 0.14297878002377568,
"eval_rougeL": 0.34083403761346187,
"eval_runtime": 27.234,
"eval_samples_per_second": 31.101,
"eval_steps_per_second": 3.892,
"step": 1000
},
{
"epoch": 4.702194357366771,
"grad_norm": 1.1165863275527954,
"learning_rate": 4.337899543378996e-06,
"loss": 2.619,
"step": 1500
},
{
"epoch": 4.702194357366771,
"eval_bleu": 0.16068905926811505,
"eval_loss": 2.6229476928710938,
"eval_rouge1": 0.4264320027787866,
"eval_rouge2": 0.1630682859845051,
"eval_rougeL": 0.367472815476786,
"eval_runtime": 27.3027,
"eval_samples_per_second": 31.023,
"eval_steps_per_second": 3.882,
"step": 1500
},
{
"epoch": 4.3383947939262475,
"grad_norm": 1.10550856590271,
"learning_rate": 4.139908256880734e-05,
"loss": 2.4047,
"step": 2000
},
{
"epoch": 4.3383947939262475,
"eval_bleu": 0.27212534220096674,
"eval_loss": 2.200192451477051,
"eval_rouge1": 0.49764917064550795,
"eval_rouge2": 0.25417403674525624,
"eval_rougeL": 0.4505978761161964,
"eval_runtime": 29.8301,
"eval_samples_per_second": 31.009,
"eval_steps_per_second": 3.889,
"step": 2000
},
{
"epoch": 5.422993492407809,
"grad_norm": 1.0486189126968384,
"learning_rate": 3.8532110091743125e-05,
"loss": 2.19,
"step": 2500
},
{
"epoch": 5.422993492407809,
"eval_bleu": 0.2853635265097057,
"eval_loss": 2.099168539047241,
"eval_rouge1": 0.5205238075842558,
"eval_rouge2": 0.27883621341002174,
"eval_rougeL": 0.4772785679427928,
"eval_runtime": 29.5017,
"eval_samples_per_second": 31.354,
"eval_steps_per_second": 3.932,
"step": 2500
},
{
"epoch": 6.507592190889371,
"grad_norm": 1.0022239685058594,
"learning_rate": 3.56651376146789e-05,
"loss": 2.0473,
"step": 3000
},
{
"epoch": 6.507592190889371,
"eval_bleu": 0.29294689624288234,
"eval_loss": 2.0362119674682617,
"eval_rouge1": 0.5380910185587349,
"eval_rouge2": 0.29647105961235576,
"eval_rougeL": 0.49649873151947865,
"eval_runtime": 29.6658,
"eval_samples_per_second": 31.181,
"eval_steps_per_second": 3.91,
"step": 3000
},
{
"epoch": 7.592190889370933,
"grad_norm": 1.1853405237197876,
"learning_rate": 3.2798165137614676e-05,
"loss": 1.9397,
"step": 3500
},
{
"epoch": 7.592190889370933,
"eval_bleu": 0.2996126116957466,
"eval_loss": 1.9933106899261475,
"eval_rouge1": 0.5494053286639744,
"eval_rouge2": 0.31025003697020603,
"eval_rougeL": 0.5101736274334897,
"eval_runtime": 29.6088,
"eval_samples_per_second": 31.241,
"eval_steps_per_second": 3.918,
"step": 3500
},
{
"epoch": 8.676789587852495,
"grad_norm": 1.1255462169647217,
"learning_rate": 2.9931192660550462e-05,
"loss": 1.857,
"step": 4000
},
{
"epoch": 8.676789587852495,
"eval_bleu": 0.30241485912380783,
"eval_loss": 1.9647237062454224,
"eval_rouge1": 0.5597611557009092,
"eval_rouge2": 0.3191422306947157,
"eval_rougeL": 0.5202653323875917,
"eval_runtime": 29.9377,
"eval_samples_per_second": 30.897,
"eval_steps_per_second": 3.875,
"step": 4000
},
{
"epoch": 9.761388286334057,
"grad_norm": 1.1697229146957397,
"learning_rate": 2.7064220183486238e-05,
"loss": 1.784,
"step": 4500
},
{
"epoch": 9.761388286334057,
"eval_bleu": 0.3061719577143718,
"eval_loss": 1.9443068504333496,
"eval_rouge1": 0.567492271856554,
"eval_rouge2": 0.3269182124324805,
"eval_rougeL": 0.5278573882748132,
"eval_runtime": 29.751,
"eval_samples_per_second": 31.091,
"eval_steps_per_second": 3.899,
"step": 4500
},
{
"epoch": 10.845986984815617,
"grad_norm": 1.070591926574707,
"learning_rate": 2.419724770642202e-05,
"loss": 1.7239,
"step": 5000
},
{
"epoch": 10.845986984815617,
"eval_bleu": 0.309858394526436,
"eval_loss": 1.931990385055542,
"eval_rouge1": 0.5723606535196859,
"eval_rouge2": 0.3338521436125379,
"eval_rougeL": 0.5341216118802655,
"eval_runtime": 29.6886,
"eval_samples_per_second": 31.157,
"eval_steps_per_second": 3.907,
"step": 5000
},
{
"epoch": 11.93058568329718,
"grad_norm": 1.0755261182785034,
"learning_rate": 2.13302752293578e-05,
"loss": 1.6713,
"step": 5500
},
{
"epoch": 11.93058568329718,
"eval_bleu": 0.3115672562854492,
"eval_loss": 1.920640230178833,
"eval_rouge1": 0.5765467952167939,
"eval_rouge2": 0.33826641143296676,
"eval_rougeL": 0.5387314433190069,
"eval_runtime": 29.7016,
"eval_samples_per_second": 31.143,
"eval_steps_per_second": 3.906,
"step": 5500
},
{
"epoch": 13.015184381778742,
"grad_norm": 1.0826488733291626,
"learning_rate": 1.8463302752293578e-05,
"loss": 1.6263,
"step": 6000
},
{
"epoch": 13.015184381778742,
"eval_bleu": 0.31268695772405475,
"eval_loss": 1.916778564453125,
"eval_rouge1": 0.5780842791223908,
"eval_rouge2": 0.34164409810850394,
"eval_rougeL": 0.5415509673961407,
"eval_runtime": 29.789,
"eval_samples_per_second": 31.052,
"eval_steps_per_second": 3.894,
"step": 6000
},
{
"epoch": 14.099783080260304,
"grad_norm": 1.0868735313415527,
"learning_rate": 1.559633027522936e-05,
"loss": 1.5869,
"step": 6500
},
{
"epoch": 14.099783080260304,
"eval_bleu": 0.31365743559233084,
"eval_loss": 1.9147837162017822,
"eval_rouge1": 0.5829184758698387,
"eval_rouge2": 0.3448101826360943,
"eval_rougeL": 0.5450794961513086,
"eval_runtime": 29.7645,
"eval_samples_per_second": 31.077,
"eval_steps_per_second": 3.897,
"step": 6500
},
{
"epoch": 15.184381778741866,
"grad_norm": 1.0827687978744507,
"learning_rate": 1.2729357798165138e-05,
"loss": 1.5544,
"step": 7000
},
{
"epoch": 15.184381778741866,
"eval_bleu": 0.315769500599606,
"eval_loss": 1.9121257066726685,
"eval_rouge1": 0.5844681250407762,
"eval_rouge2": 0.34764910748110744,
"eval_rougeL": 0.5476190296456669,
"eval_runtime": 29.7415,
"eval_samples_per_second": 31.101,
"eval_steps_per_second": 3.9,
"step": 7000
},
{
"epoch": 16.268980477223426,
"grad_norm": 1.1430450677871704,
"learning_rate": 9.862385321100918e-06,
"loss": 1.5307,
"step": 7500
},
{
"epoch": 16.268980477223426,
"eval_bleu": 0.31648880861794926,
"eval_loss": 1.9105726480484009,
"eval_rouge1": 0.5852713451659596,
"eval_rouge2": 0.34877835378762495,
"eval_rougeL": 0.5486197186684263,
"eval_runtime": 29.7345,
"eval_samples_per_second": 31.109,
"eval_steps_per_second": 3.901,
"step": 7500
},
{
"epoch": 17.35357917570499,
"grad_norm": 1.0865087509155273,
"learning_rate": 6.995412844036697e-06,
"loss": 1.5087,
"step": 8000
},
{
"epoch": 17.35357917570499,
"eval_bleu": 0.31692571547155524,
"eval_loss": 1.9093118906021118,
"eval_rouge1": 0.5860996975913157,
"eval_rouge2": 0.3503907384934047,
"eval_rougeL": 0.5500340150392318,
"eval_runtime": 29.7497,
"eval_samples_per_second": 31.093,
"eval_steps_per_second": 3.899,
"step": 8000
},
{
"epoch": 18.43817787418655,
"grad_norm": 1.1252211332321167,
"learning_rate": 4.128440366972477e-06,
"loss": 1.4937,
"step": 8500
},
{
"epoch": 18.43817787418655,
"eval_bleu": 0.31723468269919336,
"eval_loss": 1.9084105491638184,
"eval_rouge1": 0.5868586694605076,
"eval_rouge2": 0.350546625127078,
"eval_rougeL": 0.5503666110741787,
"eval_runtime": 29.7351,
"eval_samples_per_second": 31.108,
"eval_steps_per_second": 3.901,
"step": 8500
},
{
"epoch": 19.522776572668114,
"grad_norm": 1.150936245918274,
"learning_rate": 1.261467889908257e-06,
"loss": 1.4824,
"step": 9000
},
{
"epoch": 19.522776572668114,
"eval_bleu": 0.3177718226409019,
"eval_loss": 1.9086270332336426,
"eval_rouge1": 0.5875550437490973,
"eval_rouge2": 0.3512666976647323,
"eval_rougeL": 0.5509556223633276,
"eval_runtime": 30.1604,
"eval_samples_per_second": 30.669,
"eval_steps_per_second": 3.846,
"step": 9000
},
{
"epoch": 20.0,
"step": 9220,
"total_flos": 2.8862709792768e+16,
"train_loss": 1.4422371688478681,
"train_runtime": 3284.8472,
"train_samples_per_second": 22.412,
"train_steps_per_second": 2.807
}
],
"logging_steps": 500,
"max_steps": 9220,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.8862709792768e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}