{ "best_metric": 0.7602559328079224, "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_eg_03/checkpoint-14210", "epoch": 7.0, "eval_steps": 500, "global_step": 49735, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.4059462547302246, "learning_rate": 4.766772598870057e-05, "loss": 0.874, "step": 7105 }, { "epoch": 1.0, "eval_bleu": 0.34333508777542915, "eval_loss": 0.7882477641105652, "eval_rouge1": 0.6159142183825631, "eval_rouge2": 0.3508423646141942, "eval_rougeL": 0.6121110717073233, "eval_runtime": 132.93, "eval_samples_per_second": 106.891, "eval_steps_per_second": 13.368, "step": 7105 }, { "epoch": 2.0, "grad_norm": 3.7706522941589355, "learning_rate": 4.515889830508475e-05, "loss": 0.5689, "step": 14210 }, { "epoch": 2.0, "eval_bleu": 0.3631689720024005, "eval_loss": 0.7602559328079224, "eval_rouge1": 0.6402930839888462, "eval_rouge2": 0.3828942418456889, "eval_rougeL": 0.6367647874361373, "eval_runtime": 219.0505, "eval_samples_per_second": 64.866, "eval_steps_per_second": 8.112, "step": 14210 }, { "epoch": 3.0, "grad_norm": 2.4260447025299072, "learning_rate": 4.265007062146893e-05, "loss": 0.4369, "step": 21315 }, { "epoch": 3.0, "eval_bleu": 0.36610303407947103, "eval_loss": 0.7770611643791199, "eval_rouge1": 0.6205180467810911, "eval_rouge2": 0.28726786502140805, "eval_rougeL": 0.6174286813730623, "eval_runtime": 229.3738, "eval_samples_per_second": 61.947, "eval_steps_per_second": 7.747, "step": 21315 }, { "epoch": 4.0, "grad_norm": 4.454061031341553, "learning_rate": 4.014124293785311e-05, "loss": 0.3314, "step": 28420 }, { "epoch": 4.0, "eval_bleu": 0.365696341752005, "eval_loss": 0.8175964951515198, "eval_rouge1": 0.6468122957416143, "eval_rouge2": 0.3918607021930349, "eval_rougeL": 0.6436111807449674, "eval_runtime": 229.5565, "eval_samples_per_second": 61.898, "eval_steps_per_second": 7.741, "step": 28420 }, { "epoch": 5.0, "grad_norm": 2.0373406410217285, "learning_rate": 3.763241525423729e-05, "loss": 0.2518, "step": 35525 }, { "epoch": 5.0, "eval_bleu": 0.3619747558826484, "eval_loss": 0.8583759069442749, "eval_rouge1": 0.6465870583022197, "eval_rouge2": 0.3915693838544316, "eval_rougeL": 0.6434157590310816, "eval_runtime": 139.7631, "eval_samples_per_second": 101.665, "eval_steps_per_second": 12.714, "step": 35525 }, { "epoch": 6.0, "grad_norm": 2.6252825260162354, "learning_rate": 3.5123587570621466e-05, "loss": 0.1991, "step": 42630 }, { "epoch": 6.0, "eval_bleu": 0.3615834909766938, "eval_loss": 0.9106101989746094, "eval_rouge1": 0.6428131756085969, "eval_rouge2": 0.38795383008457324, "eval_rougeL": 0.6396112320774026, "eval_runtime": 228.74, "eval_samples_per_second": 62.119, "eval_steps_per_second": 7.769, "step": 42630 }, { "epoch": 7.0, "grad_norm": 1.9299352169036865, "learning_rate": 3.261475988700565e-05, "loss": 0.1678, "step": 49735 }, { "epoch": 7.0, "eval_bleu": 0.3620025104118138, "eval_loss": 0.9511250257492065, "eval_rouge1": 0.6421476450933186, "eval_rouge2": 0.38754079327242963, "eval_rougeL": 0.6388356718753687, "eval_runtime": 228.7592, "eval_samples_per_second": 62.113, "eval_steps_per_second": 7.768, "step": 49735 }, { "epoch": 7.0, "step": 49735, "total_flos": 9.237135717983846e+16, "train_loss": 0.40426278318054437, "train_runtime": 12413.183, "train_samples_per_second": 91.574, "train_steps_per_second": 11.448 } ], "logging_steps": 500, "max_steps": 142100, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.237135717983846e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }