{ "best_metric": 0.6430063247680664, "best_model_checkpoint": "output/checkpoint-50", "epoch": 0.8016835354243912, "eval_steps": 50, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.444444444444445e-07, "loss": 0.5542, "step": 1 }, { "epoch": 0.04, "learning_rate": 2e-05, "loss": 0.554, "step": 50 }, { "epoch": 0.04, "eval_accuracy": 0.6626865671641791, "eval_loss": 0.6430063247680664, "eval_runtime": 61.7692, "eval_samples_per_second": 5.423, "eval_steps_per_second": 1.36, "step": 50 }, { "epoch": 0.08, "learning_rate": 4.222222222222222e-05, "loss": 0.5957, "step": 100 }, { "epoch": 0.08, "eval_accuracy": 0.6686567164179105, "eval_loss": 0.6705919504165649, "eval_runtime": 60.4388, "eval_samples_per_second": 5.543, "eval_steps_per_second": 1.39, "step": 100 }, { "epoch": 0.12, "learning_rate": 6.444444444444446e-05, "loss": 0.5668, "step": 150 }, { "epoch": 0.12, "eval_accuracy": 0.6805970149253732, "eval_loss": 0.7224913835525513, "eval_runtime": 56.4882, "eval_samples_per_second": 5.93, "eval_steps_per_second": 1.487, "step": 150 }, { "epoch": 0.16, "learning_rate": 8.666666666666667e-05, "loss": 0.5626, "step": 200 }, { "epoch": 0.16, "eval_accuracy": 0.6746268656716418, "eval_loss": 0.7115103006362915, "eval_runtime": 56.506, "eval_samples_per_second": 5.929, "eval_steps_per_second": 1.487, "step": 200 }, { "epoch": 0.2, "learning_rate": 9.80430528375734e-05, "loss": 0.5604, "step": 250 }, { "epoch": 0.2, "eval_accuracy": 0.6746268656716418, "eval_loss": 0.700947105884552, "eval_runtime": 56.4927, "eval_samples_per_second": 5.93, "eval_steps_per_second": 1.487, "step": 250 }, { "epoch": 0.24, "learning_rate": 9.315068493150684e-05, "loss": 0.5675, "step": 300 }, { "epoch": 0.24, "eval_accuracy": 0.6597014925373135, "eval_loss": 0.662155032157898, "eval_runtime": 56.4903, "eval_samples_per_second": 5.93, "eval_steps_per_second": 1.487, "step": 300 }, { "epoch": 0.28, "learning_rate": 8.825831702544032e-05, "loss": 0.5725, "step": 350 }, { "epoch": 0.28, "eval_accuracy": 0.6567164179104478, "eval_loss": 0.6746364831924438, "eval_runtime": 56.482, "eval_samples_per_second": 5.931, "eval_steps_per_second": 1.487, "step": 350 }, { "epoch": 0.32, "learning_rate": 8.336594911937378e-05, "loss": 0.5884, "step": 400 }, { "epoch": 0.32, "eval_accuracy": 0.6656716417910448, "eval_loss": 0.7047849893569946, "eval_runtime": 56.4873, "eval_samples_per_second": 5.931, "eval_steps_per_second": 1.487, "step": 400 }, { "epoch": 0.36, "learning_rate": 7.847358121330725e-05, "loss": 0.5567, "step": 450 }, { "epoch": 0.36, "eval_accuracy": 0.6477611940298508, "eval_loss": 0.6832321286201477, "eval_runtime": 56.4814, "eval_samples_per_second": 5.931, "eval_steps_per_second": 1.487, "step": 450 }, { "epoch": 0.4, "learning_rate": 7.35812133072407e-05, "loss": 0.5723, "step": 500 }, { "epoch": 0.4, "eval_accuracy": 0.6955223880597015, "eval_loss": 0.7437570691108704, "eval_runtime": 56.4677, "eval_samples_per_second": 5.933, "eval_steps_per_second": 1.488, "step": 500 }, { "epoch": 0.44, "learning_rate": 6.868884540117417e-05, "loss": 0.5908, "step": 550 }, { "epoch": 0.44, "eval_accuracy": 0.6238805970149254, "eval_loss": 0.6612433791160583, "eval_runtime": 56.4799, "eval_samples_per_second": 5.931, "eval_steps_per_second": 1.487, "step": 550 }, { "epoch": 0.48, "learning_rate": 6.379647749510763e-05, "loss": 0.5681, "step": 600 }, { "epoch": 0.48, "eval_accuracy": 0.6507462686567164, "eval_loss": 0.6833732724189758, "eval_runtime": 56.4689, "eval_samples_per_second": 5.932, "eval_steps_per_second": 1.488, "step": 600 }, { "epoch": 0.52, "learning_rate": 5.89041095890411e-05, "loss": 0.5782, "step": 650 }, { "epoch": 0.52, "eval_accuracy": 0.6507462686567164, "eval_loss": 0.67350834608078, "eval_runtime": 56.4637, "eval_samples_per_second": 5.933, "eval_steps_per_second": 1.488, "step": 650 }, { "epoch": 0.56, "learning_rate": 5.401174168297456e-05, "loss": 0.569, "step": 700 }, { "epoch": 0.56, "eval_accuracy": 0.6686567164179105, "eval_loss": 0.7085319757461548, "eval_runtime": 56.4627, "eval_samples_per_second": 5.933, "eval_steps_per_second": 1.488, "step": 700 }, { "epoch": 0.6, "learning_rate": 4.911937377690802e-05, "loss": 0.5661, "step": 750 }, { "epoch": 0.6, "eval_accuracy": 0.6835820895522388, "eval_loss": 0.7564846277236938, "eval_runtime": 56.4648, "eval_samples_per_second": 5.933, "eval_steps_per_second": 1.488, "step": 750 }, { "epoch": 0.64, "learning_rate": 4.422700587084149e-05, "loss": 0.5675, "step": 800 }, { "epoch": 0.64, "eval_accuracy": 0.6417910447761194, "eval_loss": 0.6759604215621948, "eval_runtime": 56.4652, "eval_samples_per_second": 5.933, "eval_steps_per_second": 1.488, "step": 800 }, { "epoch": 0.68, "learning_rate": 3.933463796477495e-05, "loss": 0.5611, "step": 850 }, { "epoch": 0.68, "eval_accuracy": 0.6477611940298508, "eval_loss": 0.6798116564750671, "eval_runtime": 56.4714, "eval_samples_per_second": 5.932, "eval_steps_per_second": 1.487, "step": 850 }, { "epoch": 0.72, "learning_rate": 3.4442270058708414e-05, "loss": 0.5664, "step": 900 }, { "epoch": 0.72, "eval_accuracy": 0.6656716417910448, "eval_loss": 0.7185413837432861, "eval_runtime": 56.4811, "eval_samples_per_second": 5.931, "eval_steps_per_second": 1.487, "step": 900 }, { "epoch": 0.76, "learning_rate": 2.9549902152641878e-05, "loss": 0.5765, "step": 950 }, { "epoch": 0.76, "eval_accuracy": 0.6417910447761194, "eval_loss": 0.6795146465301514, "eval_runtime": 56.4589, "eval_samples_per_second": 5.934, "eval_steps_per_second": 1.488, "step": 950 }, { "epoch": 0.8, "learning_rate": 2.4657534246575342e-05, "loss": 0.555, "step": 1000 }, { "epoch": 0.8, "eval_accuracy": 0.6298507462686567, "eval_loss": 0.677854061126709, "eval_runtime": 56.4673, "eval_samples_per_second": 5.933, "eval_steps_per_second": 1.488, "step": 1000 } ], "logging_steps": 50, "max_steps": 1247, "num_train_epochs": 1, "save_steps": 50, "total_flos": 3.7335460865581056e+17, "trial_name": null, "trial_params": null }