{ "best_metric": 0.8627450980392157, "best_model_checkpoint": "vit-base-patch16-224-U8-10b\\checkpoint-160", "epoch": 10.0, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5, "learning_rate": 5.5e-05, "loss": 1.3694, "step": 10 }, { "epoch": 1.0, "learning_rate": 5.210526315789474e-05, "loss": 1.2473, "step": 20 }, { "epoch": 1.0, "eval_accuracy": 0.5882352941176471, "eval_loss": 1.1670756340026855, "eval_runtime": 0.9807, "eval_samples_per_second": 52.002, "eval_steps_per_second": 2.039, "step": 20 }, { "epoch": 1.5, "learning_rate": 4.921052631578948e-05, "loss": 1.1179, "step": 30 }, { "epoch": 2.0, "learning_rate": 4.631578947368421e-05, "loss": 0.955, "step": 40 }, { "epoch": 2.0, "eval_accuracy": 0.6470588235294118, "eval_loss": 0.9392386078834534, "eval_runtime": 0.9523, "eval_samples_per_second": 53.557, "eval_steps_per_second": 2.1, "step": 40 }, { "epoch": 2.5, "learning_rate": 4.342105263157895e-05, "loss": 0.8458, "step": 50 }, { "epoch": 3.0, "learning_rate": 4.0526315789473684e-05, "loss": 0.735, "step": 60 }, { "epoch": 3.0, "eval_accuracy": 0.6862745098039216, "eval_loss": 0.7247381210327148, "eval_runtime": 0.8402, "eval_samples_per_second": 60.7, "eval_steps_per_second": 2.38, "step": 60 }, { "epoch": 3.5, "learning_rate": 3.7631578947368425e-05, "loss": 0.6208, "step": 70 }, { "epoch": 4.0, "learning_rate": 3.473684210526316e-05, "loss": 0.5341, "step": 80 }, { "epoch": 4.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 0.5976751446723938, "eval_runtime": 0.8153, "eval_samples_per_second": 62.551, "eval_steps_per_second": 2.453, "step": 80 }, { "epoch": 4.5, "learning_rate": 3.1842105263157895e-05, "loss": 0.4512, "step": 90 }, { "epoch": 5.0, "learning_rate": 2.894736842105263e-05, "loss": 0.3864, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.7450980392156863, "eval_loss": 0.6555898785591125, "eval_runtime": 0.8565, "eval_samples_per_second": 59.545, "eval_steps_per_second": 2.335, "step": 100 }, { "epoch": 5.5, "learning_rate": 2.605263157894737e-05, "loss": 0.3234, "step": 110 }, { "epoch": 6.0, "learning_rate": 2.3157894736842103e-05, "loss": 0.2837, "step": 120 }, { "epoch": 6.0, "eval_accuracy": 0.7254901960784313, "eval_loss": 0.6780798435211182, "eval_runtime": 0.9582, "eval_samples_per_second": 53.226, "eval_steps_per_second": 2.087, "step": 120 }, { "epoch": 6.5, "learning_rate": 2.0263157894736842e-05, "loss": 0.2643, "step": 130 }, { "epoch": 7.0, "learning_rate": 1.736842105263158e-05, "loss": 0.2332, "step": 140 }, { "epoch": 7.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 0.5419377088546753, "eval_runtime": 0.8658, "eval_samples_per_second": 58.905, "eval_steps_per_second": 2.31, "step": 140 }, { "epoch": 7.5, "learning_rate": 1.4473684210526315e-05, "loss": 0.2328, "step": 150 }, { "epoch": 8.0, "learning_rate": 1.1578947368421052e-05, "loss": 0.1974, "step": 160 }, { "epoch": 8.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 0.5349088907241821, "eval_runtime": 0.7975, "eval_samples_per_second": 63.951, "eval_steps_per_second": 2.508, "step": 160 }, { "epoch": 8.5, "learning_rate": 8.68421052631579e-06, "loss": 0.1871, "step": 170 }, { "epoch": 9.0, "learning_rate": 5.789473684210526e-06, "loss": 0.1857, "step": 180 }, { "epoch": 9.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 0.5605680346488953, "eval_runtime": 0.8589, "eval_samples_per_second": 59.376, "eval_steps_per_second": 2.328, "step": 180 }, { "epoch": 9.5, "learning_rate": 2.894736842105263e-06, "loss": 0.1915, "step": 190 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 0.1907, "step": 200 }, { "epoch": 10.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 0.48754531145095825, "eval_runtime": 0.8369, "eval_samples_per_second": 60.941, "eval_steps_per_second": 2.39, "step": 200 }, { "epoch": 10.0, "step": 200, "total_flos": 1.9683318186491904e+18, "train_loss": 0.527636388540268, "train_runtime": 410.4734, "train_samples_per_second": 61.88, "train_steps_per_second": 0.487 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.9683318186491904e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }