{ "best_metric": 1.4523780345916748, "best_model_checkpoint": "./results/checkpoint-64", "epoch": 0.05200081251269551, "eval_steps": 8, "global_step": 64, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0002, "loss": 1.604, "step": 4 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.5865, "step": 8 }, { "epoch": 0.01, "eval_loss": 1.58369779586792, "eval_runtime": 33.9799, "eval_samples_per_second": 15.244, "eval_steps_per_second": 1.913, "step": 8 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.4024, "step": 12 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 1.3627, "step": 16 }, { "epoch": 0.01, "eval_loss": 1.503720998764038, "eval_runtime": 34.2342, "eval_samples_per_second": 15.131, "eval_steps_per_second": 1.899, "step": 16 }, { "epoch": 0.02, "learning_rate": 0.0002, "loss": 1.3242, "step": 20 }, { "epoch": 0.02, "learning_rate": 0.0002, "loss": 1.2733, "step": 24 }, { "epoch": 0.02, "eval_loss": 1.4826295375823975, "eval_runtime": 34.4234, "eval_samples_per_second": 15.048, "eval_steps_per_second": 1.888, "step": 24 }, { "epoch": 0.02, "learning_rate": 0.0002, "loss": 1.507, "step": 28 }, { "epoch": 0.03, "learning_rate": 0.0002, "loss": 1.472, "step": 32 }, { "epoch": 0.03, "eval_loss": 1.4713466167449951, "eval_runtime": 34.5227, "eval_samples_per_second": 15.005, "eval_steps_per_second": 1.883, "step": 32 }, { "epoch": 0.03, "learning_rate": 0.0002, "loss": 1.4269, "step": 36 }, { "epoch": 0.03, "learning_rate": 0.0002, "loss": 1.6282, "step": 40 }, { "epoch": 0.03, "eval_loss": 1.4705039262771606, "eval_runtime": 34.5472, "eval_samples_per_second": 14.994, "eval_steps_per_second": 1.881, "step": 40 }, { "epoch": 0.04, "learning_rate": 0.0002, "loss": 1.6744, "step": 44 }, { "epoch": 0.04, "learning_rate": 0.0002, "loss": 1.789, "step": 48 }, { "epoch": 0.04, "eval_loss": 1.4801452159881592, "eval_runtime": 34.5611, "eval_samples_per_second": 14.988, "eval_steps_per_second": 1.881, "step": 48 }, { "epoch": 0.04, "learning_rate": 0.0002, "loss": 1.7547, "step": 52 }, { "epoch": 0.05, "learning_rate": 0.0002, "loss": 1.3081, "step": 56 }, { "epoch": 0.05, "eval_loss": 1.465359091758728, "eval_runtime": 34.5837, "eval_samples_per_second": 14.978, "eval_steps_per_second": 1.88, "step": 56 }, { "epoch": 0.05, "learning_rate": 0.0002, "loss": 1.2313, "step": 60 }, { "epoch": 0.05, "learning_rate": 0.0002, "loss": 1.4273, "step": 64 }, { "epoch": 0.05, "eval_loss": 1.4523780345916748, "eval_runtime": 34.6395, "eval_samples_per_second": 14.954, "eval_steps_per_second": 1.876, "step": 64 } ], "logging_steps": 4, "max_steps": 64, "num_train_epochs": 1, "save_steps": 8, "total_flos": 7578875318034432.0, "trial_name": null, "trial_params": null }