{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.9984, "global_step": 6240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 0.2721790075302124, "eval_mse": 0.2721790075302124, "eval_runtime": 13.7261, "eval_samples_per_second": 72.854, "eval_steps_per_second": 9.107, "step": 312 }, { "epoch": 1.6, "learning_rate": 3.4640443499072686e-05, "loss": 0.2992, "step": 500 }, { "epoch": 2.0, "eval_loss": 0.24081240594387054, "eval_mse": 0.24081240594387054, "eval_runtime": 13.7912, "eval_samples_per_second": 72.51, "eval_steps_per_second": 9.064, "step": 624 }, { "epoch": 3.0, "eval_loss": 0.22090023756027222, "eval_mse": 0.22090023756027222, "eval_runtime": 13.7775, "eval_samples_per_second": 72.582, "eval_steps_per_second": 9.073, "step": 936 }, { "epoch": 3.2, "learning_rate": 3.1622983263961825e-05, "loss": 0.1095, "step": 1000 }, { "epoch": 4.0, "eval_loss": 0.18391327559947968, "eval_mse": 0.18391327559947968, "eval_runtime": 13.7422, "eval_samples_per_second": 72.769, "eval_steps_per_second": 9.096, "step": 1248 }, { "epoch": 4.81, "learning_rate": 2.860552302885096e-05, "loss": 0.0676, "step": 1500 }, { "epoch": 5.0, "eval_loss": 0.1835608184337616, "eval_mse": 0.1835608184337616, "eval_runtime": 13.7549, "eval_samples_per_second": 72.702, "eval_steps_per_second": 9.088, "step": 1560 }, { "epoch": 6.0, "eval_loss": 0.1830635964870453, "eval_mse": 0.1830635964870453, "eval_runtime": 13.799, "eval_samples_per_second": 72.469, "eval_steps_per_second": 9.059, "step": 1872 }, { "epoch": 6.41, "learning_rate": 2.5588062793740103e-05, "loss": 0.0507, "step": 2000 }, { "epoch": 7.0, "eval_loss": 0.18448784947395325, "eval_mse": 0.18448786437511444, "eval_runtime": 13.7961, "eval_samples_per_second": 72.484, "eval_steps_per_second": 9.061, "step": 2184 }, { "epoch": 8.0, "eval_loss": 0.18527822196483612, "eval_mse": 0.18527822196483612, "eval_runtime": 13.7973, "eval_samples_per_second": 72.478, "eval_steps_per_second": 9.06, "step": 2496 }, { "epoch": 8.01, "learning_rate": 2.257060255862924e-05, "loss": 0.0402, "step": 2500 }, { "epoch": 9.0, "eval_loss": 0.18943239748477936, "eval_mse": 0.18943239748477936, "eval_runtime": 13.7202, "eval_samples_per_second": 72.885, "eval_steps_per_second": 9.111, "step": 2808 }, { "epoch": 9.61, "learning_rate": 1.9553142323518382e-05, "loss": 0.0327, "step": 3000 }, { "epoch": 10.0, "eval_loss": 0.17714717984199524, "eval_mse": 0.17714717984199524, "eval_runtime": 13.7644, "eval_samples_per_second": 72.651, "eval_steps_per_second": 9.081, "step": 3120 }, { "epoch": 11.0, "eval_loss": 0.18594859540462494, "eval_mse": 0.18594858050346375, "eval_runtime": 13.6617, "eval_samples_per_second": 73.197, "eval_steps_per_second": 9.15, "step": 3432 }, { "epoch": 11.22, "learning_rate": 1.6535682088407518e-05, "loss": 0.0268, "step": 3500 }, { "epoch": 12.0, "eval_loss": 0.18873579800128937, "eval_mse": 0.18873581290245056, "eval_runtime": 13.705, "eval_samples_per_second": 72.966, "eval_steps_per_second": 9.121, "step": 3744 }, { "epoch": 12.82, "learning_rate": 1.3518221853296657e-05, "loss": 0.0211, "step": 4000 }, { "epoch": 13.0, "eval_loss": 0.19864311814308167, "eval_mse": 0.19864313304424286, "eval_runtime": 13.7757, "eval_samples_per_second": 72.592, "eval_steps_per_second": 9.074, "step": 4056 }, { "epoch": 14.0, "eval_loss": 0.17751657962799072, "eval_mse": 0.17751657962799072, "eval_runtime": 13.7818, "eval_samples_per_second": 72.56, "eval_steps_per_second": 9.07, "step": 4368 }, { "epoch": 14.42, "learning_rate": 1.0500761618185796e-05, "loss": 0.0161, "step": 4500 }, { "epoch": 15.0, "eval_loss": 0.1854146122932434, "eval_mse": 0.1854146122932434, "eval_runtime": 13.8239, "eval_samples_per_second": 72.339, "eval_steps_per_second": 9.042, "step": 4680 }, { "epoch": 16.0, "eval_loss": 0.1844572126865387, "eval_mse": 0.1844572275876999, "eval_runtime": 13.7435, "eval_samples_per_second": 72.762, "eval_steps_per_second": 9.095, "step": 4992 }, { "epoch": 16.03, "learning_rate": 7.483301383074935e-06, "loss": 0.0126, "step": 5000 }, { "epoch": 17.0, "eval_loss": 0.18561576306819916, "eval_mse": 0.18561576306819916, "eval_runtime": 13.7414, "eval_samples_per_second": 72.773, "eval_steps_per_second": 9.097, "step": 5304 }, { "epoch": 17.63, "learning_rate": 4.465841147964074e-06, "loss": 0.01, "step": 5500 }, { "epoch": 18.0, "eval_loss": 0.17724011838436127, "eval_mse": 0.17724011838436127, "eval_runtime": 13.7177, "eval_samples_per_second": 72.898, "eval_steps_per_second": 9.112, "step": 5616 }, { "epoch": 19.0, "eval_loss": 0.18078790605068207, "eval_mse": 0.18078790605068207, "eval_runtime": 13.8059, "eval_samples_per_second": 72.433, "eval_steps_per_second": 9.054, "step": 5928 }, { "epoch": 19.23, "learning_rate": 1.4483809128532133e-06, "loss": 0.008, "step": 6000 }, { "epoch": 20.0, "eval_loss": 0.18189798295497894, "eval_mse": 0.18189798295497894, "eval_runtime": 13.7884, "eval_samples_per_second": 72.524, "eval_steps_per_second": 9.066, "step": 6240 }, { "epoch": 20.0, "step": 6240, "total_flos": 2.3296341506770944e+16, "train_loss": 0.05592835318201628, "train_runtime": 6478.1274, "train_samples_per_second": 15.437, "train_steps_per_second": 0.963 } ], "max_steps": 6240, "num_train_epochs": 20, "total_flos": 2.3296341506770944e+16, "trial_name": null, "trial_params": null }