{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9648784253184098, "global_step": 12500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 1.9742699086581757e-05, "loss": 2.8717, "step": 500 }, { "epoch": 0.08, "learning_rate": 1.9485398173163515e-05, "loss": 2.2456, "step": 1000 }, { "epoch": 0.12, "learning_rate": 1.9228097259745274e-05, "loss": 2.0513, "step": 1500 }, { "epoch": 0.15, "learning_rate": 1.8970796346327032e-05, "loss": 1.9335, "step": 2000 }, { "epoch": 0.19, "learning_rate": 1.8713495432908788e-05, "loss": 1.8139, "step": 2500 }, { "epoch": 0.23, "learning_rate": 1.8456194519490546e-05, "loss": 1.7416, "step": 3000 }, { "epoch": 0.27, "learning_rate": 1.8198893606072305e-05, "loss": 1.6725, "step": 3500 }, { "epoch": 0.31, "learning_rate": 1.794159269265406e-05, "loss": 1.6281, "step": 4000 }, { "epoch": 0.35, "learning_rate": 1.768429177923582e-05, "loss": 1.5546, "step": 4500 }, { "epoch": 0.39, "learning_rate": 1.7426990865817574e-05, "loss": 1.5645, "step": 5000 }, { "epoch": 0.42, "learning_rate": 1.7169689952399332e-05, "loss": 1.5436, "step": 5500 }, { "epoch": 0.46, "learning_rate": 1.691238903898109e-05, "loss": 1.504, "step": 6000 }, { "epoch": 0.5, "learning_rate": 1.6655088125562846e-05, "loss": 1.4839, "step": 6500 }, { "epoch": 0.54, "learning_rate": 1.6397787212144604e-05, "loss": 1.4755, "step": 7000 }, { "epoch": 0.58, "learning_rate": 1.614048629872636e-05, "loss": 1.4362, "step": 7500 }, { "epoch": 0.62, "learning_rate": 1.5883185385308118e-05, "loss": 1.4063, "step": 8000 }, { "epoch": 0.66, "learning_rate": 1.5625884471889877e-05, "loss": 1.4216, "step": 8500 }, { "epoch": 0.69, "learning_rate": 1.5368583558471635e-05, "loss": 1.3668, "step": 9000 }, { "epoch": 0.73, "learning_rate": 1.5111282645053392e-05, "loss": 1.3787, "step": 9500 }, { "epoch": 0.77, "learning_rate": 1.4853981731635147e-05, "loss": 1.3967, "step": 10000 }, { "epoch": 0.81, "learning_rate": 1.4596680818216906e-05, "loss": 1.3867, "step": 10500 }, { "epoch": 0.85, "learning_rate": 1.4339379904798663e-05, "loss": 1.3715, "step": 11000 }, { "epoch": 0.89, "learning_rate": 1.4082078991380421e-05, "loss": 1.3416, "step": 11500 }, { "epoch": 0.93, "learning_rate": 1.3824778077962178e-05, "loss": 1.3113, "step": 12000 }, { "epoch": 0.96, "learning_rate": 1.3567477164543936e-05, "loss": 1.323, "step": 12500 } ], "max_steps": 38865, "num_train_epochs": 3, "total_flos": 1.95972567552e+16, "trial_name": null, "trial_params": null }