{ "best_metric": 0.6430063247680664, "best_model_checkpoint": "output/checkpoint-50", "epoch": 0.5611784747970738, "eval_steps": 50, "global_step": 700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.444444444444445e-07, "loss": 0.5542, "step": 1 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.554, "step": 50 }, { "epoch": 0.04, "eval_accuracy": 0.6626865671641791, "eval_loss": 0.6430063247680664, "eval_runtime": 61.7692, "eval_samples_per_second": 5.423, "eval_steps_per_second": 1.36, "step": 50 }, { "epoch": 0.08, "learning_rate": 4.222222222222222e-05, "loss": 0.5957, "step": 100 }, { "epoch": 0.08, "eval_accuracy": 0.6686567164179105, "eval_loss": 0.6705919504165649, "eval_runtime": 60.4388, "eval_samples_per_second": 5.543, "eval_steps_per_second": 1.39, "step": 100 }, { "epoch": 0.12, "learning_rate": 6.444444444444446e-05, "loss": 0.5668, "step": 150 }, { "epoch": 0.12, "eval_accuracy": 0.6805970149253732, "eval_loss": 0.7224913835525513, "eval_runtime": 56.4882, "eval_samples_per_second": 5.93, "eval_steps_per_second": 1.487, "step": 150 }, { "epoch": 0.16, "learning_rate": 8.666666666666667e-05, "loss": 0.5626, "step": 200 }, { "epoch": 0.16, "eval_accuracy": 0.6746268656716418, "eval_loss": 0.7115103006362915, "eval_runtime": 56.506, "eval_samples_per_second": 5.929, "eval_steps_per_second": 1.487, "step": 200 }, { "epoch": 0.2, "learning_rate": 9.80430528375734e-05, "loss": 0.5604, "step": 250 }, { "epoch": 0.2, "eval_accuracy": 0.6746268656716418, "eval_loss": 0.700947105884552, "eval_runtime": 56.4927, "eval_samples_per_second": 5.93, "eval_steps_per_second": 1.487, "step": 250 }, { "epoch": 0.24, "learning_rate": 9.315068493150684e-05, "loss": 0.5675, "step": 300 }, { "epoch": 0.24, "eval_accuracy": 0.6597014925373135, "eval_loss": 0.662155032157898, "eval_runtime": 56.4903, "eval_samples_per_second": 5.93, "eval_steps_per_second": 1.487, "step": 300 }, { "epoch": 0.28, "learning_rate": 8.825831702544032e-05, "loss": 0.5725, "step": 350 }, { "epoch": 0.28, "eval_accuracy": 0.6567164179104478, "eval_loss": 0.6746364831924438, "eval_runtime": 56.482, "eval_samples_per_second": 5.931, "eval_steps_per_second": 1.487, "step": 350 }, { "epoch": 0.32, "learning_rate": 8.336594911937378e-05, "loss": 0.5884, "step": 400 }, { "epoch": 0.32, "eval_accuracy": 0.6656716417910448, "eval_loss": 0.7047849893569946, "eval_runtime": 56.4873, "eval_samples_per_second": 5.931, "eval_steps_per_second": 1.487, "step": 400 }, { "epoch": 0.36, "learning_rate": 7.847358121330725e-05, "loss": 0.5567, "step": 450 }, { "epoch": 0.36, "eval_accuracy": 0.6477611940298508, "eval_loss": 0.6832321286201477, "eval_runtime": 56.4814, "eval_samples_per_second": 5.931, "eval_steps_per_second": 1.487, "step": 450 }, { "epoch": 0.4, "learning_rate": 7.35812133072407e-05, "loss": 0.5723, "step": 500 }, { "epoch": 0.4, "eval_accuracy": 0.6955223880597015, "eval_loss": 0.7437570691108704, "eval_runtime": 56.4677, "eval_samples_per_second": 5.933, "eval_steps_per_second": 1.488, "step": 500 }, { "epoch": 0.44, "learning_rate": 6.868884540117417e-05, "loss": 0.5908, "step": 550 }, { "epoch": 0.44, "eval_accuracy": 0.6238805970149254, "eval_loss": 0.6612433791160583, "eval_runtime": 56.4799, "eval_samples_per_second": 5.931, "eval_steps_per_second": 1.487, "step": 550 }, { "epoch": 0.48, "learning_rate": 6.379647749510763e-05, "loss": 0.5681, "step": 600 }, { "epoch": 0.48, "eval_accuracy": 0.6507462686567164, "eval_loss": 0.6833732724189758, "eval_runtime": 56.4689, "eval_samples_per_second": 5.932, "eval_steps_per_second": 1.488, "step": 600 }, { "epoch": 0.52, "learning_rate": 5.89041095890411e-05, "loss": 0.5782, "step": 650 }, { "epoch": 0.52, "eval_accuracy": 0.6507462686567164, "eval_loss": 0.67350834608078, "eval_runtime": 56.4637, "eval_samples_per_second": 5.933, "eval_steps_per_second": 1.488, "step": 650 }, { "epoch": 0.56, "learning_rate": 5.401174168297456e-05, "loss": 0.569, "step": 700 }, { "epoch": 0.56, "eval_accuracy": 0.6686567164179105, "eval_loss": 0.7085319757461548, "eval_runtime": 56.4627, "eval_samples_per_second": 5.933, "eval_steps_per_second": 1.488, "step": 700 } ], "logging_steps": 50, "max_steps": 1247, "num_train_epochs": 1, "save_steps": 50, "total_flos": 2.6108184141901824e+17, "trial_name": null, "trial_params": null }