{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.0, "eval_steps": 500, "global_step": 2352, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 13.50642145973043, "eval_loss": 1.9098774194717407, "eval_runtime": 18.3505, "eval_samples_per_second": 1.199, "eval_steps_per_second": 1.199, "step": 196 }, { "epoch": 2.0, "eval_bleu": 15.388992406642911, "eval_loss": 1.5389823913574219, "eval_runtime": 17.9949, "eval_samples_per_second": 1.223, "eval_steps_per_second": 1.223, "step": 392 }, { "epoch": 2.5510204081632653, "grad_norm": 2.0644779205322266, "learning_rate": 7.891156462585034e-05, "loss": 2.4223, "step": 500 }, { "epoch": 3.0, "eval_bleu": 16.361746081538808, "eval_loss": 1.4969135522842407, "eval_runtime": 18.102, "eval_samples_per_second": 1.215, "eval_steps_per_second": 1.215, "step": 588 }, { "epoch": 4.0, "eval_bleu": 16.192662011002973, "eval_loss": 1.558351993560791, "eval_runtime": 18.0485, "eval_samples_per_second": 1.219, "eval_steps_per_second": 1.219, "step": 784 }, { "epoch": 5.0, "eval_bleu": 20.388609774897613, "eval_loss": 1.6301382780075073, "eval_runtime": 17.8816, "eval_samples_per_second": 1.23, "eval_steps_per_second": 1.23, "step": 980 }, { "epoch": 5.1020408163265305, "grad_norm": 3.387509822845459, "learning_rate": 5.7653061224489805e-05, "loss": 0.5615, "step": 1000 }, { "epoch": 6.0, "eval_bleu": 20.820547846052477, "eval_loss": 1.6597435474395752, "eval_runtime": 18.4966, "eval_samples_per_second": 1.189, "eval_steps_per_second": 1.189, "step": 1176 }, { "epoch": 7.0, "eval_bleu": 20.333903309139888, "eval_loss": 1.710249662399292, "eval_runtime": 17.8589, "eval_samples_per_second": 1.232, "eval_steps_per_second": 1.232, "step": 1372 }, { "epoch": 7.653061224489796, "grad_norm": 2.134899377822876, "learning_rate": 3.639455782312925e-05, "loss": 0.2154, "step": 1500 }, { "epoch": 8.0, "eval_bleu": 19.92413260385578, "eval_loss": 1.7504385709762573, "eval_runtime": 19.3501, "eval_samples_per_second": 1.137, "eval_steps_per_second": 1.137, "step": 1568 }, { "epoch": 9.0, "eval_bleu": 21.495754342436005, "eval_loss": 1.7941405773162842, "eval_runtime": 18.5215, "eval_samples_per_second": 1.188, "eval_steps_per_second": 1.188, "step": 1764 }, { "epoch": 10.0, "eval_bleu": 22.143240405081055, "eval_loss": 1.8230061531066895, "eval_runtime": 19.4583, "eval_samples_per_second": 1.131, "eval_steps_per_second": 1.131, "step": 1960 }, { "epoch": 10.204081632653061, "grad_norm": 0.1605977714061737, "learning_rate": 1.5136054421768709e-05, "loss": 0.1043, "step": 2000 }, { "epoch": 11.0, "eval_bleu": 21.334082933744533, "eval_loss": 1.8319286108016968, "eval_runtime": 18.1893, "eval_samples_per_second": 1.21, "eval_steps_per_second": 1.21, "step": 2156 } ], "logging_steps": 500, "max_steps": 2352, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 637128753020928.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }