{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.984, "global_step": 62, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 2.5e-05, "loss": 2.4811, "step": 1 }, { "epoch": 0.06, "learning_rate": 5e-05, "loss": 2.7677, "step": 2 }, { "epoch": 0.1, "learning_rate": 7.500000000000001e-05, "loss": 2.8184, "step": 3 }, { "epoch": 0.13, "learning_rate": 0.0001, "loss": 2.7164, "step": 4 }, { "epoch": 0.16, "learning_rate": 9.992667069255619e-05, "loss": 2.6905, "step": 5 }, { "epoch": 0.19, "learning_rate": 9.970689785771798e-05, "loss": 2.4758, "step": 6 }, { "epoch": 0.22, "learning_rate": 9.934132612707632e-05, "loss": 2.1961, "step": 7 }, { "epoch": 0.26, "learning_rate": 9.883102778550434e-05, "loss": 2.1084, "step": 8 }, { "epoch": 0.29, "learning_rate": 9.817749962596115e-05, "loss": 1.9172, "step": 9 }, { "epoch": 0.32, "learning_rate": 9.738265855914013e-05, "loss": 1.7626, "step": 10 }, { "epoch": 0.35, "learning_rate": 9.644883599083958e-05, "loss": 1.6495, "step": 11 }, { "epoch": 0.38, "learning_rate": 9.537877098354786e-05, "loss": 1.3566, "step": 12 }, { "epoch": 0.42, "learning_rate": 9.417560222230115e-05, "loss": 1.2721, "step": 13 }, { "epoch": 0.42, "eval_loss": 1.381595492362976, "eval_runtime": 1.4441, "eval_samples_per_second": 34.623, "eval_steps_per_second": 4.847, "step": 13 }, { "epoch": 0.45, "learning_rate": 9.284285880837946e-05, "loss": 1.1247, "step": 14 }, { "epoch": 0.48, "learning_rate": 9.138444990784453e-05, "loss": 1.3827, "step": 15 }, { "epoch": 0.51, "learning_rate": 8.980465328528219e-05, "loss": 1.4877, "step": 16 }, { "epoch": 0.54, "learning_rate": 8.810810275638183e-05, "loss": 1.4821, "step": 17 }, { "epoch": 0.58, "learning_rate": 8.629977459615655e-05, "loss": 1.3786, "step": 18 }, { "epoch": 0.61, "learning_rate": 8.438497294267117e-05, "loss": 1.2494, "step": 19 }, { "epoch": 0.64, "learning_rate": 8.236931423909138e-05, "loss": 1.1829, "step": 20 }, { "epoch": 0.67, "learning_rate": 8.025871075968828e-05, "loss": 1.0193, "step": 21 }, { "epoch": 0.7, "learning_rate": 7.805935326811912e-05, "loss": 1.4785, "step": 22 }, { "epoch": 0.74, "learning_rate": 7.577769285885109e-05, "loss": 1.4004, "step": 23 }, { "epoch": 0.77, "learning_rate": 7.342042203498951e-05, "loss": 1.3328, "step": 24 }, { "epoch": 0.8, "learning_rate": 7.099445507801323e-05, "loss": 1.2686, "step": 25 }, { "epoch": 0.83, "learning_rate": 6.850690776699573e-05, "loss": 1.0142, "step": 26 }, { "epoch": 0.83, "eval_loss": 1.1247260570526123, "eval_runtime": 1.4412, "eval_samples_per_second": 34.693, "eval_steps_per_second": 4.857, "step": 26 }, { "epoch": 0.86, "learning_rate": 6.5965076506799e-05, "loss": 0.9446, "step": 27 }, { "epoch": 0.9, "learning_rate": 6.337641692646106e-05, "loss": 0.8311, "step": 28 }, { "epoch": 0.93, "learning_rate": 6.0748522010551215e-05, "loss": 1.3538, "step": 29 }, { "epoch": 0.96, "learning_rate": 5.808909982763825e-05, "loss": 1.1263, "step": 30 }, { "epoch": 0.99, "learning_rate": 5.540595092119709e-05, "loss": 0.9153, "step": 31 }, { "epoch": 1.02, "learning_rate": 5.270694542927088e-05, "loss": 0.9691, "step": 32 }, { "epoch": 1.06, "learning_rate": 5e-05, "loss": 1.377, "step": 33 }, { "epoch": 1.09, "learning_rate": 4.729305457072913e-05, "loss": 1.242, "step": 34 }, { "epoch": 1.12, "learning_rate": 4.4594049078802925e-05, "loss": 1.2525, "step": 35 }, { "epoch": 1.15, "learning_rate": 4.1910900172361764e-05, "loss": 1.0716, "step": 36 }, { "epoch": 1.18, "learning_rate": 3.92514779894488e-05, "loss": 0.9491, "step": 37 }, { "epoch": 1.22, "learning_rate": 3.6623583073538966e-05, "loss": 0.7949, "step": 38 }, { "epoch": 1.25, "learning_rate": 3.403492349320101e-05, "loss": 1.1087, "step": 39 }, { "epoch": 1.25, "eval_loss": 1.0872222185134888, "eval_runtime": 1.4388, "eval_samples_per_second": 34.752, "eval_steps_per_second": 4.865, "step": 39 }, { "epoch": 1.28, "learning_rate": 3.149309223300428e-05, "loss": 1.3412, "step": 40 }, { "epoch": 1.31, "learning_rate": 2.900554492198677e-05, "loss": 1.1742, "step": 41 }, { "epoch": 1.34, "learning_rate": 2.65795779650105e-05, "loss": 1.0373, "step": 42 }, { "epoch": 1.38, "learning_rate": 2.422230714114891e-05, "loss": 1.0752, "step": 43 }, { "epoch": 1.41, "learning_rate": 2.194064673188089e-05, "loss": 0.8705, "step": 44 }, { "epoch": 1.44, "learning_rate": 1.9741289240311755e-05, "loss": 0.8475, "step": 45 }, { "epoch": 1.47, "learning_rate": 1.7630685760908622e-05, "loss": 1.2784, "step": 46 }, { "epoch": 1.5, "learning_rate": 1.561502705732883e-05, "loss": 1.4262, "step": 47 }, { "epoch": 1.54, "learning_rate": 1.3700225403843469e-05, "loss": 1.1989, "step": 48 }, { "epoch": 1.57, "learning_rate": 1.1891897243618182e-05, "loss": 1.0853, "step": 49 }, { "epoch": 1.6, "learning_rate": 1.0195346714717813e-05, "loss": 0.8383, "step": 50 }, { "epoch": 1.63, "learning_rate": 8.615550092155478e-06, "loss": 0.9405, "step": 51 }, { "epoch": 1.66, "learning_rate": 7.157141191620548e-06, "loss": 0.837, "step": 52 }, { "epoch": 1.66, "eval_loss": 1.077386736869812, "eval_runtime": 1.4402, "eval_samples_per_second": 34.716, "eval_steps_per_second": 4.86, "step": 52 }, { "epoch": 1.7, "learning_rate": 5.824397777698859e-06, "loss": 1.1777, "step": 53 }, { "epoch": 1.73, "learning_rate": 4.621229016452156e-06, "loss": 1.2908, "step": 54 }, { "epoch": 1.76, "learning_rate": 3.551164009160429e-06, "loss": 1.2032, "step": 55 }, { "epoch": 1.79, "learning_rate": 2.6173414408598827e-06, "loss": 1.0568, "step": 56 }, { "epoch": 1.82, "learning_rate": 1.8225003740388547e-06, "loss": 1.029, "step": 57 }, { "epoch": 1.86, "learning_rate": 1.1689722144956671e-06, "loss": 1.0957, "step": 58 }, { "epoch": 1.89, "learning_rate": 6.58673872923693e-07, "loss": 0.7948, "step": 59 }, { "epoch": 1.92, "learning_rate": 2.9310214228202013e-07, "loss": 1.1412, "step": 60 }, { "epoch": 1.95, "learning_rate": 7.332930744380906e-08, "loss": 1.3215, "step": 61 }, { "epoch": 1.98, "learning_rate": 0.0, "loss": 1.0566, "step": 62 } ], "max_steps": 62, "num_train_epochs": 2, "total_flos": 1589370366787584.0, "trial_name": null, "trial_params": null }