{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.121212121212121, "eval_steps": 500, "global_step": 15, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 0.00029994859874633357, "loss": 3.1318, "step": 1 }, { "epoch": 0.12, "eval_loss": 2.8352584838867188, "eval_runtime": 2.5475, "eval_samples_per_second": 91.853, "eval_steps_per_second": 3.14, "step": 1 }, { "epoch": 1.12, "eval_loss": 2.636443853378296, "eval_runtime": 1.541, "eval_samples_per_second": 151.85, "eval_steps_per_second": 5.191, "step": 2 }, { "epoch": 2.12, "eval_loss": 2.4945569038391113, "eval_runtime": 1.5187, "eval_samples_per_second": 154.083, "eval_steps_per_second": 5.268, "step": 3 }, { "epoch": 3.12, "eval_loss": 2.5336802005767822, "eval_runtime": 1.5117, "eval_samples_per_second": 154.794, "eval_steps_per_second": 5.292, "step": 4 }, { "epoch": 4.12, "learning_rate": 0.00029871672920607153, "loss": 2.7387, "step": 5 }, { "epoch": 4.12, "eval_loss": 2.335446357727051, "eval_runtime": 1.5223, "eval_samples_per_second": 153.715, "eval_steps_per_second": 5.255, "step": 5 }, { "epoch": 5.12, "eval_loss": 2.2142887115478516, "eval_runtime": 1.5277, "eval_samples_per_second": 153.174, "eval_steps_per_second": 5.237, "step": 6 }, { "epoch": 6.12, "eval_loss": 2.1648504734039307, "eval_runtime": 1.5302, "eval_samples_per_second": 152.92, "eval_steps_per_second": 5.228, "step": 7 }, { "epoch": 7.12, "eval_loss": 2.1061620712280273, "eval_runtime": 1.521, "eval_samples_per_second": 153.849, "eval_steps_per_second": 5.26, "step": 8 }, { "epoch": 8.12, "eval_loss": 2.0853805541992188, "eval_runtime": 1.5247, "eval_samples_per_second": 153.473, "eval_steps_per_second": 5.247, "step": 9 }, { "epoch": 9.12, "learning_rate": 0.0002948888739433602, "loss": 2.2698, "step": 10 }, { "epoch": 9.12, "eval_loss": 2.04968523979187, "eval_runtime": 1.529, "eval_samples_per_second": 153.041, "eval_steps_per_second": 5.232, "step": 10 }, { "epoch": 10.12, "eval_loss": 1.9576212167739868, "eval_runtime": 1.5153, "eval_samples_per_second": 154.423, "eval_steps_per_second": 5.279, "step": 11 }, { "epoch": 11.12, "eval_loss": 1.8584753274917603, "eval_runtime": 1.5118, "eval_samples_per_second": 154.787, "eval_steps_per_second": 5.292, "step": 12 }, { "epoch": 12.12, "eval_loss": 1.7771046161651611, "eval_runtime": 1.5236, "eval_samples_per_second": 153.588, "eval_steps_per_second": 5.251, "step": 13 }, { "epoch": 13.12, "eval_loss": 1.795164942741394, "eval_runtime": 1.5138, "eval_samples_per_second": 154.581, "eval_steps_per_second": 5.285, "step": 14 }, { "epoch": 14.12, "learning_rate": 0.000288581929876693, "loss": 1.9483, "step": 15 }, { "epoch": 14.12, "eval_loss": 1.7796375751495361, "eval_runtime": 1.5232, "eval_samples_per_second": 153.622, "eval_steps_per_second": 5.252, "step": 15 }, { "epoch": 14.12, "step": 15, "total_flos": 1357642651926528.0, "train_loss": 2.34513889948527, "train_runtime": 717.7443, "train_samples_per_second": 43.825, "train_steps_per_second": 0.167 } ], "logging_steps": 5, "max_steps": 120, "num_train_epochs": 15, "save_steps": 500, "total_flos": 1357642651926528.0, "trial_name": null, "trial_params": null }