{ "best_metric": 0.9015151515151515, "best_model_checkpoint": "dit-base-rvlcdip-finetuned-grp-actual/checkpoint-112", "epoch": 6.72, "eval_steps": 500, "global_step": 126, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.53, "learning_rate": 3.846153846153846e-05, "loss": 0.8692, "step": 10 }, { "epoch": 0.96, "eval_accuracy": 0.8560606060606061, "eval_loss": 0.6972441673278809, "eval_runtime": 15.3616, "eval_samples_per_second": 17.186, "eval_steps_per_second": 0.586, "step": 18 }, { "epoch": 1.07, "learning_rate": 4.690265486725664e-05, "loss": 0.7995, "step": 20 }, { "epoch": 1.6, "learning_rate": 4.247787610619469e-05, "loss": 0.7348, "step": 30 }, { "epoch": 1.97, "eval_accuracy": 0.8598484848484849, "eval_loss": 0.6350475549697876, "eval_runtime": 16.1781, "eval_samples_per_second": 16.318, "eval_steps_per_second": 0.556, "step": 37 }, { "epoch": 2.13, "learning_rate": 3.8053097345132744e-05, "loss": 0.6855, "step": 40 }, { "epoch": 2.67, "learning_rate": 3.3628318584070804e-05, "loss": 0.6655, "step": 50 }, { "epoch": 2.99, "eval_accuracy": 0.8712121212121212, "eval_loss": 0.533867359161377, "eval_runtime": 15.3125, "eval_samples_per_second": 17.241, "eval_steps_per_second": 0.588, "step": 56 }, { "epoch": 3.2, "learning_rate": 2.9203539823008852e-05, "loss": 0.7083, "step": 60 }, { "epoch": 3.73, "learning_rate": 2.4778761061946905e-05, "loss": 0.7167, "step": 70 }, { "epoch": 4.0, "eval_accuracy": 0.8901515151515151, "eval_loss": 0.5045967102050781, "eval_runtime": 15.2445, "eval_samples_per_second": 17.318, "eval_steps_per_second": 0.59, "step": 75 }, { "epoch": 4.27, "learning_rate": 2.0353982300884957e-05, "loss": 0.6926, "step": 80 }, { "epoch": 4.8, "learning_rate": 1.592920353982301e-05, "loss": 0.694, "step": 90 }, { "epoch": 4.96, "eval_accuracy": 0.8863636363636364, "eval_loss": 0.5026077628135681, "eval_runtime": 15.2119, "eval_samples_per_second": 17.355, "eval_steps_per_second": 0.592, "step": 93 }, { "epoch": 5.33, "learning_rate": 1.1504424778761062e-05, "loss": 0.6366, "step": 100 }, { "epoch": 5.87, "learning_rate": 7.079646017699115e-06, "loss": 0.6638, "step": 110 }, { "epoch": 5.97, "eval_accuracy": 0.9015151515151515, "eval_loss": 0.46005359292030334, "eval_runtime": 15.7633, "eval_samples_per_second": 16.748, "eval_steps_per_second": 0.571, "step": 112 }, { "epoch": 6.4, "learning_rate": 2.6548672566371683e-06, "loss": 0.6618, "step": 120 }, { "epoch": 6.72, "eval_accuracy": 0.8977272727272727, "eval_loss": 0.45817553997039795, "eval_runtime": 17.722, "eval_samples_per_second": 14.897, "eval_steps_per_second": 0.508, "step": 126 }, { "epoch": 6.72, "step": 126, "total_flos": 1.2388649195611423e+18, "train_loss": 0.7077309669010223, "train_runtime": 1389.7418, "train_samples_per_second": 11.968, "train_steps_per_second": 0.091 } ], "logging_steps": 10, "max_steps": 126, "num_train_epochs": 7, "save_steps": 500, "total_flos": 1.2388649195611423e+18, "trial_name": null, "trial_params": null }