{ "best_metric": 0.4778192639350891, "best_model_checkpoint": "./vit-lr-0.0001/checkpoint-642", "epoch": 12.0, "eval_steps": 500, "global_step": 3852, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.167571067810059, "learning_rate": 9.997548148581541e-05, "loss": 0.6718, "step": 321 }, { "epoch": 1.0, "eval_accuracy": 0.8242024965325936, "eval_f1": 0.8162630829948523, "eval_loss": 0.4934074282646179, "eval_precision": 0.8254417232097437, "eval_recall": 0.8242024965325936, "eval_runtime": 37.6292, "eval_samples_per_second": 76.643, "eval_steps_per_second": 9.594, "step": 321 }, { "epoch": 2.0, "grad_norm": 6.953319072723389, "learning_rate": 9.990164344447551e-05, "loss": 0.4045, "step": 642 }, { "epoch": 2.0, "eval_accuracy": 0.8311373092926491, "eval_f1": 0.8268544179848872, "eval_loss": 0.4778192639350891, "eval_precision": 0.8433264041291242, "eval_recall": 0.8311373092926491, "eval_runtime": 37.5244, "eval_samples_per_second": 76.857, "eval_steps_per_second": 9.62, "step": 642 }, { "epoch": 3.0, "grad_norm": 7.354413986206055, "learning_rate": 9.977855850575433e-05, "loss": 0.2419, "step": 963 }, { "epoch": 3.0, "eval_accuracy": 0.840499306518724, "eval_f1": 0.8409562025706393, "eval_loss": 0.5132780075073242, "eval_precision": 0.8524873378932092, "eval_recall": 0.840499306518724, "eval_runtime": 36.8758, "eval_samples_per_second": 78.209, "eval_steps_per_second": 9.79, "step": 963 }, { "epoch": 4.0, "grad_norm": 0.17630255222320557, "learning_rate": 9.960634813962617e-05, "loss": 0.1267, "step": 1284 }, { "epoch": 4.0, "eval_accuracy": 0.8495145631067961, "eval_f1": 0.8447806876150558, "eval_loss": 0.6153807044029236, "eval_precision": 0.8490665425851771, "eval_recall": 0.8495145631067961, "eval_runtime": 37.1161, "eval_samples_per_second": 77.702, "eval_steps_per_second": 9.726, "step": 1284 }, { "epoch": 5.0, "grad_norm": 0.5389394760131836, "learning_rate": 9.938518229693118e-05, "loss": 0.0733, "step": 1605 }, { "epoch": 5.0, "eval_accuracy": 0.8422330097087378, "eval_f1": 0.8361023278787632, "eval_loss": 0.7844527959823608, "eval_precision": 0.8420613182520028, "eval_recall": 0.8422330097087378, "eval_runtime": 37.3092, "eval_samples_per_second": 77.3, "eval_steps_per_second": 9.676, "step": 1605 }, { "epoch": 6.0, "grad_norm": 24.989282608032227, "learning_rate": 9.911527924165445e-05, "loss": 0.0446, "step": 1926 }, { "epoch": 6.0, "eval_accuracy": 0.8470873786407767, "eval_f1": 0.840751584936853, "eval_loss": 0.8766722679138184, "eval_precision": 0.8407202080357687, "eval_recall": 0.8470873786407767, "eval_runtime": 37.9466, "eval_samples_per_second": 76.002, "eval_steps_per_second": 9.513, "step": 1926 }, { "epoch": 7.0, "grad_norm": 0.16091497242450714, "learning_rate": 9.879690533552573e-05, "loss": 0.0523, "step": 2247 }, { "epoch": 7.0, "eval_accuracy": 0.84500693481276, "eval_f1": 0.8406651419661996, "eval_loss": 0.8674203753471375, "eval_precision": 0.8491745633887275, "eval_recall": 0.84500693481276, "eval_runtime": 37.6505, "eval_samples_per_second": 76.599, "eval_steps_per_second": 9.588, "step": 2247 }, { "epoch": 8.0, "grad_norm": 0.0028496491722762585, "learning_rate": 9.843159102999166e-05, "loss": 0.0388, "step": 2568 }, { "epoch": 8.0, "eval_accuracy": 0.8398058252427184, "eval_f1": 0.8387317635385871, "eval_loss": 0.9753792881965637, "eval_precision": 0.8565844319804308, "eval_recall": 0.8398058252427184, "eval_runtime": 37.8044, "eval_samples_per_second": 76.287, "eval_steps_per_second": 9.549, "step": 2568 }, { "epoch": 9.0, "grad_norm": 0.0019187598954886198, "learning_rate": 9.801741382013225e-05, "loss": 0.0402, "step": 2889 }, { "epoch": 9.0, "eval_accuracy": 0.8491678224687933, "eval_f1": 0.8461066193854649, "eval_loss": 0.9369620084762573, "eval_precision": 0.8547133286785668, "eval_recall": 0.8491678224687933, "eval_runtime": 37.5745, "eval_samples_per_second": 76.754, "eval_steps_per_second": 9.608, "step": 2889 }, { "epoch": 10.0, "grad_norm": 0.00408038217574358, "learning_rate": 9.755584922004499e-05, "loss": 0.0283, "step": 3210 }, { "epoch": 10.0, "eval_accuracy": 0.8509015256588072, "eval_f1": 0.8482666902922076, "eval_loss": 0.9217829704284668, "eval_precision": 0.8495520865912046, "eval_recall": 0.8509015256588072, "eval_runtime": 37.6867, "eval_samples_per_second": 76.526, "eval_steps_per_second": 9.579, "step": 3210 }, { "epoch": 11.0, "grad_norm": 2.3583085536956787, "learning_rate": 9.70473527382648e-05, "loss": 0.0451, "step": 3531 }, { "epoch": 11.0, "eval_accuracy": 0.8474341192787794, "eval_f1": 0.8401062715209746, "eval_loss": 0.987198531627655, "eval_precision": 0.8400167652295873, "eval_recall": 0.8474341192787794, "eval_runtime": 37.5611, "eval_samples_per_second": 76.782, "eval_steps_per_second": 9.611, "step": 3531 }, { "epoch": 12.0, "grad_norm": 11.217573165893555, "learning_rate": 9.649242619942769e-05, "loss": 0.0549, "step": 3852 }, { "epoch": 12.0, "eval_accuracy": 0.8457004160887656, "eval_f1": 0.8415779150905001, "eval_loss": 1.0202850103378296, "eval_precision": 0.8487681253829549, "eval_recall": 0.8457004160887656, "eval_runtime": 37.4542, "eval_samples_per_second": 77.001, "eval_steps_per_second": 9.638, "step": 3852 }, { "epoch": 12.0, "step": 3852, "total_flos": 4.768760767819088e+18, "train_loss": 0.15186076372211968, "train_runtime": 1747.1919, "train_samples_per_second": 293.5, "train_steps_per_second": 18.372 } ], "logging_steps": 500, "max_steps": 32100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 4.768760767819088e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }