{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 17910, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 1.944165270798437e-05, "loss": 3.7464, "step": 500 }, { "epoch": 0.11, "learning_rate": 1.8883305415968732e-05, "loss": 3.0711, "step": 1000 }, { "epoch": 0.17, "learning_rate": 1.83249581239531e-05, "loss": 2.8204, "step": 1500 }, { "epoch": 0.22, "learning_rate": 1.7766610831937466e-05, "loss": 2.6566, "step": 2000 }, { "epoch": 0.28, "learning_rate": 1.720826353992183e-05, "loss": 2.5476, "step": 2500 }, { "epoch": 0.34, "learning_rate": 1.66499162479062e-05, "loss": 2.4642, "step": 3000 }, { "epoch": 0.39, "learning_rate": 1.6091568955890565e-05, "loss": 2.3983, "step": 3500 }, { "epoch": 0.45, "learning_rate": 1.553322166387493e-05, "loss": 2.3347, "step": 4000 }, { "epoch": 0.5, "learning_rate": 1.4974874371859299e-05, "loss": 2.2982, "step": 4500 }, { "epoch": 0.56, "learning_rate": 1.4416527079843662e-05, "loss": 2.2627, "step": 5000 }, { "epoch": 0.61, "learning_rate": 1.385817978782803e-05, "loss": 2.2366, "step": 5500 }, { "epoch": 0.67, "learning_rate": 1.3299832495812398e-05, "loss": 2.1983, "step": 6000 }, { "epoch": 0.73, "learning_rate": 1.2741485203796761e-05, "loss": 2.1904, "step": 6500 }, { "epoch": 0.78, "learning_rate": 1.2183137911781128e-05, "loss": 2.1333, "step": 7000 }, { "epoch": 0.84, "learning_rate": 1.1624790619765495e-05, "loss": 2.1207, "step": 7500 }, { "epoch": 0.89, "learning_rate": 1.106644332774986e-05, "loss": 2.1035, "step": 8000 }, { "epoch": 0.95, "learning_rate": 1.0508096035734227e-05, "loss": 2.0814, "step": 8500 }, { "epoch": 1.0, "eval_loss": 1.9788250923156738, "eval_runtime": 99.7037, "eval_samples_per_second": 302.537, "eval_steps_per_second": 4.734, "step": 8955 }, { "epoch": 1.01, "learning_rate": 9.949748743718594e-06, "loss": 2.0605, "step": 9000 }, { "epoch": 1.06, "learning_rate": 9.39140145170296e-06, "loss": 2.0316, "step": 9500 }, { "epoch": 1.12, "learning_rate": 8.833054159687326e-06, "loss": 2.0237, "step": 10000 }, { "epoch": 1.17, "learning_rate": 8.274706867671693e-06, "loss": 2.0168, "step": 10500 }, { "epoch": 1.23, "learning_rate": 7.716359575656058e-06, "loss": 1.9817, "step": 11000 }, { "epoch": 1.28, "learning_rate": 7.158012283640425e-06, "loss": 1.9987, "step": 11500 }, { "epoch": 1.34, "learning_rate": 6.599664991624791e-06, "loss": 1.9769, "step": 12000 }, { "epoch": 1.4, "learning_rate": 6.041317699609157e-06, "loss": 1.9657, "step": 12500 }, { "epoch": 1.45, "learning_rate": 5.482970407593524e-06, "loss": 1.9676, "step": 13000 }, { "epoch": 1.51, "learning_rate": 4.92462311557789e-06, "loss": 1.9505, "step": 13500 }, { "epoch": 1.56, "learning_rate": 4.366275823562256e-06, "loss": 1.9629, "step": 14000 }, { "epoch": 1.62, "learning_rate": 3.8079285315466224e-06, "loss": 1.9709, "step": 14500 }, { "epoch": 1.68, "learning_rate": 3.2495812395309884e-06, "loss": 1.9377, "step": 15000 }, { "epoch": 1.73, "learning_rate": 2.691233947515355e-06, "loss": 1.9388, "step": 15500 }, { "epoch": 1.79, "learning_rate": 2.132886655499721e-06, "loss": 1.9248, "step": 16000 }, { "epoch": 1.84, "learning_rate": 1.5745393634840873e-06, "loss": 1.9169, "step": 16500 }, { "epoch": 1.9, "learning_rate": 1.0161920714684535e-06, "loss": 1.9248, "step": 17000 }, { "epoch": 1.95, "learning_rate": 4.5784477945281974e-07, "loss": 1.9237, "step": 17500 }, { "epoch": 2.0, "eval_loss": 1.8554816246032715, "eval_runtime": 89.1269, "eval_samples_per_second": 338.439, "eval_steps_per_second": 5.296, "step": 17910 }, { "epoch": 2.0, "step": 17910, "total_flos": 3.772502476406784e+16, "train_loss": 2.197303864837425, "train_runtime": 7444.9411, "train_samples_per_second": 153.96, "train_steps_per_second": 2.406 } ], "max_steps": 17910, "num_train_epochs": 2, "total_flos": 3.772502476406784e+16, "trial_name": null, "trial_params": null }