{ "best_metric": 0.7315686941146851, "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_eg/checkpoint-28428", "epoch": 9.0, "eval_steps": 500, "global_step": 63963, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.742945671081543, "learning_rate": 4.7667678621858235e-05, "loss": 1.1436, "step": 7107 }, { "epoch": 1.0, "eval_bleu": 0.1900415894207328, "eval_loss": 0.8277140259742737, "eval_rouge1": 0.5211536867388353, "eval_rouge2": 0.2576275131704426, "eval_rougeL": 0.5169189427573101, "eval_runtime": 204.5959, "eval_samples_per_second": 69.464, "eval_steps_per_second": 8.685, "step": 7107 }, { "epoch": 2.0, "grad_norm": 3.031801223754883, "learning_rate": 4.515885343123411e-05, "loss": 0.7508, "step": 14214 }, { "epoch": 2.0, "eval_bleu": 0.22138684401610842, "eval_loss": 0.7543078064918518, "eval_rouge1": 0.5674397247471176, "eval_rouge2": 0.3108337383535441, "eval_rougeL": 0.5636106781794015, "eval_runtime": 171.1246, "eval_samples_per_second": 83.051, "eval_steps_per_second": 10.384, "step": 14214 }, { "epoch": 3.0, "grad_norm": 1.8590487241744995, "learning_rate": 4.265002824061e-05, "loss": 0.6471, "step": 21321 }, { "epoch": 3.0, "eval_bleu": 0.2374960454489342, "eval_loss": 0.7337948083877563, "eval_rouge1": 0.5880985827608463, "eval_rouge2": 0.33558513842625187, "eval_rougeL": 0.5844518671510625, "eval_runtime": 68.7002, "eval_samples_per_second": 206.87, "eval_steps_per_second": 25.866, "step": 21321 }, { "epoch": 4.0, "grad_norm": 1.3944027423858643, "learning_rate": 4.0141203049985884e-05, "loss": 0.5713, "step": 28428 }, { "epoch": 4.0, "eval_bleu": 0.24587227576979195, "eval_loss": 0.7315686941146851, "eval_rouge1": 0.6017197427075045, "eval_rouge2": 0.3518746485163118, "eval_rougeL": 0.5982542515796094, "eval_runtime": 62.0721, "eval_samples_per_second": 228.959, "eval_steps_per_second": 28.628, "step": 28428 }, { "epoch": 5.0, "grad_norm": 2.199220657348633, "learning_rate": 3.763237785936176e-05, "loss": 0.5097, "step": 35535 }, { "epoch": 5.0, "eval_bleu": 0.24748155317226092, "eval_loss": 0.7390380501747131, "eval_rouge1": 0.6058102682046419, "eval_rouge2": 0.357170685615976, "eval_rougeL": 0.6021635755679425, "eval_runtime": 67.8747, "eval_samples_per_second": 209.386, "eval_steps_per_second": 26.181, "step": 35535 }, { "epoch": 6.0, "grad_norm": 2.055725574493408, "learning_rate": 3.512355266873765e-05, "loss": 0.4573, "step": 42642 }, { "epoch": 6.0, "eval_bleu": 0.25030630377831276, "eval_loss": 0.748293399810791, "eval_rouge1": 0.6103116816448397, "eval_rouge2": 0.361846050958361, "eval_rougeL": 0.6066395364597333, "eval_runtime": 56.4418, "eval_samples_per_second": 251.799, "eval_steps_per_second": 31.484, "step": 42642 }, { "epoch": 7.0, "grad_norm": 1.6595733165740967, "learning_rate": 3.2614727478113526e-05, "loss": 0.4118, "step": 49749 }, { "epoch": 7.0, "eval_bleu": 0.2494244558337241, "eval_loss": 0.7635838389396667, "eval_rouge1": 0.610621109140437, "eval_rouge2": 0.3633959713058441, "eval_rougeL": 0.6069537363647842, "eval_runtime": 173.9311, "eval_samples_per_second": 81.711, "eval_steps_per_second": 10.217, "step": 49749 }, { "epoch": 8.0, "grad_norm": 3.863671064376831, "learning_rate": 3.010590228748941e-05, "loss": 0.3725, "step": 56856 }, { "epoch": 8.0, "eval_bleu": 0.25065847486647275, "eval_loss": 0.7796261310577393, "eval_rouge1": 0.6126587801190159, "eval_rouge2": 0.3659624175392553, "eval_rougeL": 0.6088959046619336, "eval_runtime": 170.86, "eval_samples_per_second": 83.179, "eval_steps_per_second": 10.4, "step": 56856 }, { "epoch": 9.0, "grad_norm": 3.546931266784668, "learning_rate": 2.7597077096865293e-05, "loss": 0.3375, "step": 63963 }, { "epoch": 9.0, "eval_bleu": 0.24908190761452426, "eval_loss": 0.7973926663398743, "eval_rouge1": 0.6111967178899901, "eval_rouge2": 0.36536691181853787, "eval_rougeL": 0.6074289902749841, "eval_runtime": 173.0755, "eval_samples_per_second": 82.114, "eval_steps_per_second": 10.267, "step": 63963 }, { "epoch": 9.0, "step": 63963, "total_flos": 3.3423104950272e+16, "train_loss": 0.5779510789562912, "train_runtime": 8660.6869, "train_samples_per_second": 131.285, "train_steps_per_second": 16.412 } ], "logging_steps": 500, "max_steps": 142140, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.3423104950272e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }