{ "best_metric": 8.105906632790287, "best_model_checkpoint": "/opt/dlami/nvme/AAA/checkpoint-1986", "epoch": 6.0, "eval_steps": 500, "global_step": 1986, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.04436992481350899, "learning_rate": 0.0006666666666666668, "loss": 0.6502, "step": 331 }, { "epoch": 1.0, "eval_bleu": 4.361617844387214, "eval_loss": 0.007830865681171417, "eval_runtime": 121.9513, "eval_samples_per_second": 10.849, "eval_steps_per_second": 0.681, "step": 331 }, { "epoch": 2.0, "grad_norm": 0.038081083446741104, "learning_rate": 0.0005333333333333334, "loss": 0.0071, "step": 662 }, { "epoch": 2.0, "eval_bleu": 7.843100767141589, "eval_loss": 0.0018343101255595684, "eval_runtime": 122.0918, "eval_samples_per_second": 10.836, "eval_steps_per_second": 0.68, "step": 662 }, { "epoch": 3.0, "grad_norm": 0.019110124558210373, "learning_rate": 0.00045714285714285713, "loss": 0.0027, "step": 993 }, { "epoch": 3.0, "eval_bleu": 8.040733224615437, "eval_loss": 0.0016475560842081904, "eval_runtime": 122.063, "eval_samples_per_second": 10.839, "eval_steps_per_second": 0.68, "step": 993 }, { "epoch": 4.0, "grad_norm": 0.006580352783203125, "learning_rate": 0.00034285714285714285, "loss": 0.0016, "step": 1324 }, { "epoch": 4.0, "eval_bleu": 8.046469246678804, "eval_loss": 0.0006783127901144326, "eval_runtime": 122.0945, "eval_samples_per_second": 10.836, "eval_steps_per_second": 0.68, "step": 1324 }, { "epoch": 5.0, "grad_norm": 0.0007480424828827381, "learning_rate": 0.00022857142857142857, "loss": 0.001, "step": 1655 }, { "epoch": 5.0, "eval_bleu": 8.065670810538343, "eval_loss": 0.0004619210085365921, "eval_runtime": 122.0219, "eval_samples_per_second": 10.842, "eval_steps_per_second": 0.68, "step": 1655 }, { "epoch": 6.0, "grad_norm": 0.00835416093468666, "learning_rate": 0.00011428571428571428, "loss": 0.0007, "step": 1986 }, { "epoch": 6.0, "eval_bleu": 8.105906632790287, "eval_loss": 0.00035273091634735465, "eval_runtime": 121.9325, "eval_samples_per_second": 10.85, "eval_steps_per_second": 0.681, "step": 1986 } ], "logging_steps": 500, "max_steps": 2317, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.933197719371776e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }