{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.5, "eval_steps": 20, "global_step": 350, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 1e-05, "loss": 0.588, "step": 10 }, { "epoch": 0.2, "learning_rate": 2e-05, "loss": 0.5793, "step": 20 }, { "epoch": 0.2, "eval_loss": 0.5849277377128601, "eval_runtime": 13.5251, "eval_samples_per_second": 1.848, "eval_steps_per_second": 0.296, "step": 20 }, { "epoch": 0.3, "learning_rate": 3e-05, "loss": 0.5065, "step": 30 }, { "epoch": 0.4, "learning_rate": 4e-05, "loss": 0.5404, "step": 40 }, { "epoch": 0.4, "eval_loss": 0.5613898634910583, "eval_runtime": 13.3646, "eval_samples_per_second": 1.871, "eval_steps_per_second": 0.299, "step": 40 }, { "epoch": 0.5, "learning_rate": 5e-05, "loss": 0.4523, "step": 50 }, { "epoch": 0.6, "learning_rate": 4.9986331433523156e-05, "loss": 0.5692, "step": 60 }, { "epoch": 0.6, "eval_loss": 0.5513718724250793, "eval_runtime": 13.3937, "eval_samples_per_second": 1.867, "eval_steps_per_second": 0.299, "step": 60 }, { "epoch": 0.7, "learning_rate": 4.994534068046937e-05, "loss": 0.5257, "step": 70 }, { "epoch": 0.8, "learning_rate": 4.9877072563625285e-05, "loss": 0.4937, "step": 80 }, { "epoch": 0.8, "eval_loss": 0.5424299836158752, "eval_runtime": 13.4443, "eval_samples_per_second": 1.86, "eval_steps_per_second": 0.298, "step": 80 }, { "epoch": 0.9, "learning_rate": 4.978160173317438e-05, "loss": 0.5617, "step": 90 }, { "epoch": 1.0, "learning_rate": 4.965903258506806e-05, "loss": 0.3592, "step": 100 }, { "epoch": 1.0, "eval_loss": 0.5260083675384521, "eval_runtime": 13.4207, "eval_samples_per_second": 1.863, "eval_steps_per_second": 0.298, "step": 100 }, { "epoch": 1.1, "learning_rate": 4.9509499146870236e-05, "loss": 0.4586, "step": 110 }, { "epoch": 1.2, "learning_rate": 4.933316493120015e-05, "loss": 0.3724, "step": 120 }, { "epoch": 1.2, "eval_loss": 0.5247484445571899, "eval_runtime": 13.4092, "eval_samples_per_second": 1.864, "eval_steps_per_second": 0.298, "step": 120 }, { "epoch": 1.3, "learning_rate": 4.913022275693372e-05, "loss": 0.3205, "step": 130 }, { "epoch": 1.4, "learning_rate": 4.8900894538358944e-05, "loss": 0.4962, "step": 140 }, { "epoch": 1.4, "eval_loss": 0.5248944163322449, "eval_runtime": 13.4511, "eval_samples_per_second": 1.859, "eval_steps_per_second": 0.297, "step": 140 }, { "epoch": 1.5, "learning_rate": 4.864543104251587e-05, "loss": 0.3324, "step": 150 }, { "epoch": 1.6, "learning_rate": 4.8364111614986527e-05, "loss": 0.4781, "step": 160 }, { "epoch": 1.6, "eval_loss": 0.5312905311584473, "eval_runtime": 13.4112, "eval_samples_per_second": 1.864, "eval_steps_per_second": 0.298, "step": 160 }, { "epoch": 1.7, "learning_rate": 4.805724387443462e-05, "loss": 0.3429, "step": 170 }, { "epoch": 1.8, "learning_rate": 4.7725163376229064e-05, "loss": 0.3504, "step": 180 }, { "epoch": 1.8, "eval_loss": 0.5292657017707825, "eval_runtime": 13.3837, "eval_samples_per_second": 1.868, "eval_steps_per_second": 0.299, "step": 180 }, { "epoch": 1.9, "learning_rate": 4.736823324551909e-05, "loss": 0.4843, "step": 190 }, { "epoch": 2.0, "learning_rate": 4.698684378016222e-05, "loss": 0.2449, "step": 200 }, { "epoch": 2.0, "eval_loss": 0.5281097292900085, "eval_runtime": 13.438, "eval_samples_per_second": 1.86, "eval_steps_per_second": 0.298, "step": 200 }, { "epoch": 2.1, "learning_rate": 4.6581412023939354e-05, "loss": 0.3535, "step": 210 }, { "epoch": 2.2, "learning_rate": 4.6152381310523387e-05, "loss": 0.2264, "step": 220 }, { "epoch": 2.2, "eval_loss": 0.5974144339561462, "eval_runtime": 13.4417, "eval_samples_per_second": 1.86, "eval_steps_per_second": 0.298, "step": 220 }, { "epoch": 2.3, "learning_rate": 4.5700220778700504e-05, "loss": 0.2303, "step": 230 }, { "epoch": 2.4, "learning_rate": 4.522542485937369e-05, "loss": 0.3395, "step": 240 }, { "epoch": 2.4, "eval_loss": 0.5777361989021301, "eval_runtime": 13.414, "eval_samples_per_second": 1.864, "eval_steps_per_second": 0.298, "step": 240 }, { "epoch": 2.5, "learning_rate": 4.4728512734909844e-05, "loss": 0.1651, "step": 250 }, { "epoch": 2.6, "learning_rate": 4.421002777142148e-05, "loss": 0.3093, "step": 260 }, { "epoch": 2.6, "eval_loss": 0.5854852199554443, "eval_runtime": 13.4001, "eval_samples_per_second": 1.866, "eval_steps_per_second": 0.299, "step": 260 }, { "epoch": 2.7, "learning_rate": 4.367053692460385e-05, "loss": 0.2512, "step": 270 }, { "epoch": 2.8, "learning_rate": 4.311063011977723e-05, "loss": 0.2054, "step": 280 }, { "epoch": 2.8, "eval_loss": 0.5999884605407715, "eval_runtime": 13.4618, "eval_samples_per_second": 1.857, "eval_steps_per_second": 0.297, "step": 280 }, { "epoch": 2.9, "learning_rate": 4.2530919606812216e-05, "loss": 0.3547, "step": 290 }, { "epoch": 3.0, "learning_rate": 4.193203929064353e-05, "loss": 0.1739, "step": 300 }, { "epoch": 3.0, "eval_loss": 0.5840221047401428, "eval_runtime": 13.4097, "eval_samples_per_second": 1.864, "eval_steps_per_second": 0.298, "step": 300 }, { "epoch": 3.1, "learning_rate": 4.131464403810422e-05, "loss": 0.1787, "step": 310 }, { "epoch": 3.2, "learning_rate": 4.067940896183843e-05, "loss": 0.1165, "step": 320 }, { "epoch": 3.2, "eval_loss": 0.6706953644752502, "eval_runtime": 13.4189, "eval_samples_per_second": 1.863, "eval_steps_per_second": 0.298, "step": 320 }, { "epoch": 3.3, "learning_rate": 4.002702868207563e-05, "loss": 0.1477, "step": 330 }, { "epoch": 3.4, "learning_rate": 3.935821656707359e-05, "loss": 0.2011, "step": 340 }, { "epoch": 3.4, "eval_loss": 0.6934244632720947, "eval_runtime": 13.403, "eval_samples_per_second": 1.865, "eval_steps_per_second": 0.298, "step": 340 }, { "epoch": 3.5, "learning_rate": 3.867370395306068e-05, "loss": 0.1066, "step": 350 } ], "logging_steps": 10, "max_steps": 1000, "num_train_epochs": 10, "save_steps": 50, "total_flos": 4.924285778959565e+16, "trial_name": null, "trial_params": null }