{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999729483584162, "global_step": 13862, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 2.8839221341023793e-05, "loss": 1.4339, "step": 500 }, { "epoch": 0.07, "learning_rate": 5.767844268204759e-05, "loss": 1.4587, "step": 1000 }, { "epoch": 0.11, "learning_rate": 7.927535070140282e-05, "loss": 1.5324, "step": 1500 }, { "epoch": 0.14, "learning_rate": 7.606893787575151e-05, "loss": 1.5695, "step": 2000 }, { "epoch": 0.18, "learning_rate": 7.286252505010021e-05, "loss": 1.5931, "step": 2500 }, { "epoch": 0.22, "learning_rate": 6.96561122244489e-05, "loss": 1.5994, "step": 3000 }, { "epoch": 0.25, "learning_rate": 6.64496993987976e-05, "loss": 1.6028, "step": 3500 }, { "epoch": 0.29, "learning_rate": 6.32432865731463e-05, "loss": 1.5965, "step": 4000 }, { "epoch": 0.32, "learning_rate": 6.0036873747494996e-05, "loss": 1.6002, "step": 4500 }, { "epoch": 0.36, "learning_rate": 5.683046092184369e-05, "loss": 1.6017, "step": 5000 }, { "epoch": 0.4, "learning_rate": 5.362404809619239e-05, "loss": 1.5735, "step": 5500 }, { "epoch": 0.43, "learning_rate": 5.041763527054109e-05, "loss": 1.5765, "step": 6000 }, { "epoch": 0.47, "learning_rate": 4.7211222444889784e-05, "loss": 1.5713, "step": 6500 }, { "epoch": 0.5, "learning_rate": 4.400480961923849e-05, "loss": 1.5619, "step": 7000 }, { "epoch": 0.54, "learning_rate": 4.0798396793587175e-05, "loss": 1.5509, "step": 7500 }, { "epoch": 0.58, "learning_rate": 3.759198396793588e-05, "loss": 1.5421, "step": 8000 }, { "epoch": 0.61, "learning_rate": 3.438557114228457e-05, "loss": 1.5299, "step": 8500 }, { "epoch": 0.65, "learning_rate": 3.117915831663327e-05, "loss": 1.5285, "step": 9000 }, { "epoch": 0.69, "learning_rate": 2.7972745490981967e-05, "loss": 1.5326, "step": 9500 }, { "epoch": 0.72, "learning_rate": 2.4766332665330663e-05, "loss": 1.5119, "step": 10000 }, { "epoch": 0.76, "learning_rate": 2.1559919839679358e-05, "loss": 1.5147, "step": 10500 }, { "epoch": 0.79, "learning_rate": 1.8353507014028057e-05, "loss": 1.4978, "step": 11000 }, { "epoch": 0.83, "learning_rate": 1.5147094188376754e-05, "loss": 1.4914, "step": 11500 }, { "epoch": 0.87, "learning_rate": 1.1940681362725453e-05, "loss": 1.4889, "step": 12000 }, { "epoch": 0.9, "learning_rate": 8.734268537074148e-06, "loss": 1.4894, "step": 12500 }, { "epoch": 0.94, "learning_rate": 5.527855711422846e-06, "loss": 1.4743, "step": 13000 }, { "epoch": 0.97, "learning_rate": 2.321442885771543e-06, "loss": 1.4627, "step": 13500 }, { "epoch": 1.0, "step": 13862, "total_flos": 7.817060132659814e+17, "train_loss": 1.5347596183515975, "train_runtime": 69651.7637, "train_samples_per_second": 6.369, "train_steps_per_second": 0.199 } ], "max_steps": 13862, "num_train_epochs": 1, "total_flos": 7.817060132659814e+17, "trial_name": null, "trial_params": null }