{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.901639344262295, "global_step": 90, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0, "loss": 1.8309, "step": 1 }, { "epoch": 0.52, "learning_rate": 2e-05, "loss": 1.5321, "step": 8 }, { "epoch": 0.98, "eval_loss": 1.2855817079544067, "eval_runtime": 6.6367, "eval_samples_per_second": 30.437, "eval_steps_per_second": 1.055, "step": 15 }, { "epoch": 1.05, "learning_rate": 2e-05, "loss": 1.35, "step": 16 }, { "epoch": 1.57, "learning_rate": 2e-05, "loss": 1.2071, "step": 24 }, { "epoch": 1.97, "eval_loss": 1.2619894742965698, "eval_runtime": 5.1554, "eval_samples_per_second": 39.183, "eval_steps_per_second": 1.358, "step": 30 }, { "epoch": 2.1, "learning_rate": 2e-05, "loss": 1.1502, "step": 32 }, { "epoch": 2.62, "learning_rate": 2e-05, "loss": 1.0162, "step": 40 }, { "epoch": 2.95, "eval_loss": 1.285272240638733, "eval_runtime": 5.1992, "eval_samples_per_second": 38.852, "eval_steps_per_second": 1.346, "step": 45 }, { "epoch": 3.15, "learning_rate": 2e-05, "loss": 0.9511, "step": 48 }, { "epoch": 3.67, "learning_rate": 2e-05, "loss": 0.8484, "step": 56 }, { "epoch": 4.0, "eval_loss": 1.3274288177490234, "eval_runtime": 5.1899, "eval_samples_per_second": 38.922, "eval_steps_per_second": 1.349, "step": 61 }, { "epoch": 4.2, "learning_rate": 2e-05, "loss": 0.7971, "step": 64 }, { "epoch": 4.72, "learning_rate": 2e-05, "loss": 0.6981, "step": 72 }, { "epoch": 4.98, "eval_loss": 1.3993656635284424, "eval_runtime": 5.213, "eval_samples_per_second": 38.749, "eval_steps_per_second": 1.343, "step": 76 }, { "epoch": 5.25, "learning_rate": 2e-05, "loss": 0.6462, "step": 80 }, { "epoch": 5.77, "learning_rate": 2e-05, "loss": 0.5668, "step": 88 }, { "epoch": 5.9, "eval_loss": 1.4719988107681274, "eval_runtime": 5.1996, "eval_samples_per_second": 38.849, "eval_steps_per_second": 1.346, "step": 90 }, { "epoch": 5.9, "step": 90, "total_flos": 383994839433216.0, "train_loss": 0.9728257921006944, "train_runtime": 2307.7787, "train_samples_per_second": 10.108, "train_steps_per_second": 0.039 } ], "max_steps": 90, "num_train_epochs": 6, "total_flos": 383994839433216.0, "trial_name": null, "trial_params": null }