|
{ |
|
"best_metric": 0.9066910147666931, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_eg_1.5/checkpoint-21315", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 56840, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.173611044883728, |
|
"learning_rate": 4.766772598870057e-05, |
|
"loss": 1.2517, |
|
"step": 7105 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.27630483908384024, |
|
"eval_loss": 1.0203286409378052, |
|
"eval_rouge1": 0.45177772565881835, |
|
"eval_rouge2": 0.09405681309859196, |
|
"eval_rougeL": 0.4439172371201736, |
|
"eval_runtime": 399.4609, |
|
"eval_samples_per_second": 35.57, |
|
"eval_steps_per_second": 4.448, |
|
"step": 7105 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.3228689432144165, |
|
"learning_rate": 4.515889830508475e-05, |
|
"loss": 0.8548, |
|
"step": 14210 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.3013703850585307, |
|
"eval_loss": 0.9441202282905579, |
|
"eval_rouge1": 0.5003553784801738, |
|
"eval_rouge2": 0.1420292743132085, |
|
"eval_rougeL": 0.49367336720870203, |
|
"eval_runtime": 384.7586, |
|
"eval_samples_per_second": 36.93, |
|
"eval_steps_per_second": 4.618, |
|
"step": 14210 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.1944307088851929, |
|
"learning_rate": 4.265007062146893e-05, |
|
"loss": 0.7533, |
|
"step": 21315 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.310650314692537, |
|
"eval_loss": 0.9066910147666931, |
|
"eval_rouge1": 0.5230238843217684, |
|
"eval_rouge2": 0.16671937404400886, |
|
"eval_rougeL": 0.5166126640861954, |
|
"eval_runtime": 1037.5655, |
|
"eval_samples_per_second": 13.695, |
|
"eval_steps_per_second": 1.713, |
|
"step": 21315 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.6159381866455078, |
|
"learning_rate": 4.014124293785311e-05, |
|
"loss": 0.6781, |
|
"step": 28420 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.32048113085357516, |
|
"eval_loss": 0.909532904624939, |
|
"eval_rouge1": 0.5602679605982237, |
|
"eval_rouge2": 0.28777006827963697, |
|
"eval_rougeL": 0.5559392353971754, |
|
"eval_runtime": 377.7695, |
|
"eval_samples_per_second": 37.613, |
|
"eval_steps_per_second": 4.704, |
|
"step": 28420 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.188673973083496, |
|
"learning_rate": 3.763241525423729e-05, |
|
"loss": 0.6134, |
|
"step": 35525 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.3250047891084671, |
|
"eval_loss": 0.9152291417121887, |
|
"eval_rouge1": 0.5456119185913646, |
|
"eval_rouge2": 0.19532259739828473, |
|
"eval_rougeL": 0.5403228217764222, |
|
"eval_runtime": 377.7325, |
|
"eval_samples_per_second": 37.617, |
|
"eval_steps_per_second": 4.704, |
|
"step": 35525 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.2344160079956055, |
|
"learning_rate": 3.5123587570621466e-05, |
|
"loss": 0.5542, |
|
"step": 42630 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.3266066839569369, |
|
"eval_loss": 0.9293861985206604, |
|
"eval_rouge1": 0.5719694809249145, |
|
"eval_rouge2": 0.3028610424917365, |
|
"eval_rougeL": 0.5675532772123946, |
|
"eval_runtime": 380.2832, |
|
"eval_samples_per_second": 37.364, |
|
"eval_steps_per_second": 4.673, |
|
"step": 42630 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.5062270164489746, |
|
"learning_rate": 3.261475988700565e-05, |
|
"loss": 0.4993, |
|
"step": 49735 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.3252361071861535, |
|
"eval_loss": 0.9462244510650635, |
|
"eval_rouge1": 0.5502763386552358, |
|
"eval_rouge2": 0.20139303396164254, |
|
"eval_rougeL": 0.5447611551880709, |
|
"eval_runtime": 378.2074, |
|
"eval_samples_per_second": 37.569, |
|
"eval_steps_per_second": 4.698, |
|
"step": 49735 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.5913320779800415, |
|
"learning_rate": 3.010593220338983e-05, |
|
"loss": 0.4482, |
|
"step": 56840 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.32453596360754455, |
|
"eval_loss": 0.9838415384292603, |
|
"eval_rouge1": 0.5485197319407531, |
|
"eval_rouge2": 0.20067000334446508, |
|
"eval_rougeL": 0.542963955198015, |
|
"eval_runtime": 377.6879, |
|
"eval_samples_per_second": 37.621, |
|
"eval_steps_per_second": 4.705, |
|
"step": 56840 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 56840, |
|
"total_flos": 5.152666773356544e+17, |
|
"train_loss": 0.7066081702751144, |
|
"train_runtime": 59748.056, |
|
"train_samples_per_second": 19.025, |
|
"train_steps_per_second": 2.378 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 142100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.152666773356544e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|