{ "best_metric": 6.446480382145557, "best_model_checkpoint": "/opt/dlami/nvme/AAA_V2/checkpoint-1584", "epoch": 11.0, "eval_steps": 500, "global_step": 2178, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.28139322996139526, "learning_rate": 0.0007714285714285713, "loss": 0.0358, "step": 198 }, { "epoch": 1.0, "eval_bleu": 6.260606562914056, "eval_loss": 0.0037530818954110146, "eval_runtime": 73.4886, "eval_samples_per_second": 10.736, "eval_steps_per_second": 0.68, "step": 198 }, { "epoch": 2.0, "grad_norm": 0.16617226600646973, "learning_rate": 0.0006428571428571428, "loss": 0.0052, "step": 396 }, { "epoch": 2.0, "eval_bleu": 6.308825370321139, "eval_loss": 0.001781024388037622, "eval_runtime": 73.278, "eval_samples_per_second": 10.767, "eval_steps_per_second": 0.682, "step": 396 }, { "epoch": 3.0, "grad_norm": 0.1758955419063568, "learning_rate": 0.0005142857142857142, "loss": 0.0025, "step": 594 }, { "epoch": 3.0, "eval_bleu": 6.368129059471027, "eval_loss": 0.0011983831645920873, "eval_runtime": 73.2664, "eval_samples_per_second": 10.769, "eval_steps_per_second": 0.682, "step": 594 }, { "epoch": 4.0, "grad_norm": 0.0046499622985720634, "learning_rate": 0.00038571428571428567, "loss": 0.0015, "step": 792 }, { "epoch": 4.0, "eval_bleu": 6.385440196858329, "eval_loss": 0.0010980970691889524, "eval_runtime": 73.1472, "eval_samples_per_second": 10.786, "eval_steps_per_second": 0.684, "step": 792 }, { "epoch": 5.0, "grad_norm": 0.0024423596914857626, "learning_rate": 0.0002571428571428571, "loss": 0.0008, "step": 990 }, { "epoch": 5.0, "eval_bleu": 6.41579040812938, "eval_loss": 0.0010172681650146842, "eval_runtime": 73.3825, "eval_samples_per_second": 10.752, "eval_steps_per_second": 0.681, "step": 990 }, { "epoch": 6.0, "grad_norm": 0.00022464522044174373, "learning_rate": 0.00012857142857142855, "loss": 0.0006, "step": 1188 }, { "epoch": 6.0, "eval_bleu": 6.392992917575001, "eval_loss": 0.0009776438819244504, "eval_runtime": 73.1926, "eval_samples_per_second": 10.78, "eval_steps_per_second": 0.683, "step": 1188 }, { "epoch": 7.0, "grad_norm": 0.0007703190785832703, "learning_rate": 0.0, "loss": 0.0004, "step": 1386 }, { "epoch": 7.0, "eval_bleu": 6.4192986039574595, "eval_loss": 0.0009626392857171595, "eval_runtime": 73.2421, "eval_samples_per_second": 10.772, "eval_steps_per_second": 0.683, "step": 1386 }, { "epoch": 8.0, "grad_norm": 4.616127989720553e-05, "learning_rate": 0.0003, "loss": 0.0006, "step": 1584 }, { "epoch": 8.0, "eval_bleu": 6.446480382145557, "eval_loss": 0.0012350629549473524, "eval_runtime": 73.8593, "eval_samples_per_second": 10.682, "eval_steps_per_second": 0.677, "step": 1584 }, { "epoch": 9.0, "grad_norm": 5.939168113400228e-05, "learning_rate": 0.000225, "loss": 0.0008, "step": 1782 }, { "epoch": 9.0, "eval_bleu": 6.435039176248687, "eval_loss": 0.0010175537317991257, "eval_runtime": 73.5317, "eval_samples_per_second": 10.73, "eval_steps_per_second": 0.68, "step": 1782 }, { "epoch": 10.0, "grad_norm": 2.800251604639925e-05, "learning_rate": 0.00015, "loss": 0.0006, "step": 1980 }, { "epoch": 10.0, "eval_bleu": 6.399756503138505, "eval_loss": 0.0010247103637084365, "eval_runtime": 73.5802, "eval_samples_per_second": 10.723, "eval_steps_per_second": 0.68, "step": 1980 }, { "epoch": 11.0, "grad_norm": 0.0001712931552901864, "learning_rate": 7.5e-05, "loss": 0.0004, "step": 2178 }, { "epoch": 11.0, "eval_bleu": 6.409543063646119, "eval_loss": 0.0009960704483091831, "eval_runtime": 73.5452, "eval_samples_per_second": 10.728, "eval_steps_per_second": 0.68, "step": 2178 } ], "logging_steps": 500, "max_steps": 2376, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.112048651829248e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }