{ "best_metric": 0.07713755965232849, "best_model_checkpoint": "/nasty/data/tpid/vizwiz/ViTGPT2_VW/checkpoint-14000", "epoch": 0.04404233526800956, "global_step": 14645, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.9863839666769097e-05, "loss": 0.1256, "step": 1000 }, { "epoch": 0.03, "eval_loss": 0.09279213845729828, "eval_runtime": 1047.3083, "eval_samples_per_second": 37.0, "eval_steps_per_second": 9.25, "step": 1000 }, { "epoch": 0.07, "learning_rate": 1.9727269623408106e-05, "loss": 0.0947, "step": 2000 }, { "epoch": 0.07, "eval_loss": 0.0897236242890358, "eval_runtime": 864.4123, "eval_samples_per_second": 44.828, "eval_steps_per_second": 11.208, "step": 2000 }, { "epoch": 0.1, "learning_rate": 1.9590699580047118e-05, "loss": 0.0889, "step": 3000 }, { "epoch": 0.1, "eval_loss": 0.08590871840715408, "eval_runtime": 857.9482, "eval_samples_per_second": 45.166, "eval_steps_per_second": 11.292, "step": 3000 }, { "epoch": 0.14, "learning_rate": 1.945412953668613e-05, "loss": 0.0888, "step": 4000 }, { "epoch": 0.14, "eval_loss": 0.08419814705848694, "eval_runtime": 866.4231, "eval_samples_per_second": 44.724, "eval_steps_per_second": 11.182, "step": 4000 }, { "epoch": 0.17, "learning_rate": 1.931755949332514e-05, "loss": 0.0866, "step": 5000 }, { "epoch": 0.17, "eval_loss": 0.08309577405452728, "eval_runtime": 864.2485, "eval_samples_per_second": 44.837, "eval_steps_per_second": 11.21, "step": 5000 }, { "epoch": 0.2, "learning_rate": 1.918098944996415e-05, "loss": 0.0852, "step": 6000 }, { "epoch": 0.2, "eval_loss": 0.08189540356397629, "eval_runtime": 865.5776, "eval_samples_per_second": 44.768, "eval_steps_per_second": 11.193, "step": 6000 }, { "epoch": 0.24, "learning_rate": 1.9044419406603162e-05, "loss": 0.0833, "step": 7000 }, { "epoch": 0.24, "eval_loss": 0.08095835894346237, "eval_runtime": 864.7144, "eval_samples_per_second": 44.812, "eval_steps_per_second": 11.204, "step": 7000 }, { "epoch": 0.27, "learning_rate": 1.8907849363242174e-05, "loss": 0.0835, "step": 8000 }, { "epoch": 0.27, "eval_loss": 0.08023638278245926, "eval_runtime": 866.728, "eval_samples_per_second": 44.708, "eval_steps_per_second": 11.178, "step": 8000 }, { "epoch": 0.31, "learning_rate": 1.8771279319881186e-05, "loss": 0.081, "step": 9000 }, { "epoch": 0.31, "eval_loss": 0.07963699847459793, "eval_runtime": 865.0337, "eval_samples_per_second": 44.796, "eval_steps_per_second": 11.2, "step": 9000 }, { "epoch": 0.34, "learning_rate": 1.86347092765202e-05, "loss": 0.0803, "step": 10000 }, { "epoch": 0.34, "eval_loss": 0.07886938005685806, "eval_runtime": 864.2901, "eval_samples_per_second": 44.834, "eval_steps_per_second": 11.209, "step": 10000 }, { "epoch": 0.38, "learning_rate": 1.849813923315921e-05, "loss": 0.0814, "step": 11000 }, { "epoch": 0.38, "eval_loss": 0.07851768285036087, "eval_runtime": 863.0619, "eval_samples_per_second": 44.898, "eval_steps_per_second": 11.225, "step": 11000 }, { "epoch": 0.41, "learning_rate": 1.836156918979822e-05, "loss": 0.0799, "step": 12000 }, { "epoch": 0.41, "eval_loss": 0.0779803916811943, "eval_runtime": 862.8886, "eval_samples_per_second": 44.907, "eval_steps_per_second": 11.227, "step": 12000 }, { "epoch": 0.44, "learning_rate": 1.822513571648059e-05, "loss": 0.0786, "step": 13000 }, { "epoch": 0.44, "eval_loss": 0.07762513309717178, "eval_runtime": 861.7094, "eval_samples_per_second": 44.969, "eval_steps_per_second": 11.243, "step": 13000 }, { "epoch": 0.48, "learning_rate": 1.8088565673119603e-05, "loss": 0.0796, "step": 14000 }, { "epoch": 0.48, "eval_loss": 0.07713755965232849, "eval_runtime": 866.4253, "eval_samples_per_second": 44.724, "eval_steps_per_second": 11.182, "step": 14000 }, { "epoch": 0.04, "step": 14645, "total_flos": 7.904602466056929e+18, "train_loss": 0.0033295607640092873, "train_runtime": 303.4327, "train_samples_per_second": 386.099, "train_steps_per_second": 48.264 } ], "max_steps": 14645, "num_train_epochs": 1, "total_flos": 7.904602466056929e+18, "trial_name": null, "trial_params": null }