{ "best_metric": 0.3921276926994324, "best_model_checkpoint": "./vit-base-3e-5-randaug/checkpoint-2750", "epoch": 10.0, "eval_steps": 500, "global_step": 2750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.36, "grad_norm": 4.155110836029053, "learning_rate": 2.9902226030228252e-05, "loss": 2.7863, "step": 100 }, { "epoch": 0.73, "grad_norm": 4.886130332946777, "learning_rate": 2.9610178754135005e-05, "loss": 1.6815, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.7737574552683897, "eval_loss": 0.9074916839599609, "eval_runtime": 54.9169, "eval_samples_per_second": 45.796, "eval_steps_per_second": 0.728, "step": 275 }, { "epoch": 1.09, "grad_norm": 4.915349960327148, "learning_rate": 2.9127665454592872e-05, "loss": 1.2844, "step": 300 }, { "epoch": 1.45, "grad_norm": 4.22341775894165, "learning_rate": 2.8460976430370375e-05, "loss": 1.06, "step": 400 }, { "epoch": 1.82, "grad_norm": 4.15113639831543, "learning_rate": 2.7618802992467718e-05, "loss": 0.9759, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.8500994035785289, "eval_loss": 0.5867449641227722, "eval_runtime": 54.5476, "eval_samples_per_second": 46.106, "eval_steps_per_second": 0.733, "step": 550 }, { "epoch": 2.18, "grad_norm": 4.799998760223389, "learning_rate": 2.661212415958624e-05, "loss": 0.9053, "step": 600 }, { "epoch": 2.55, "grad_norm": 4.3418684005737305, "learning_rate": 2.5454063529829405e-05, "loss": 0.8298, "step": 700 }, { "epoch": 2.91, "grad_norm": 4.906313896179199, "learning_rate": 2.4159718194531573e-05, "loss": 0.7955, "step": 800 }, { "epoch": 3.0, "eval_accuracy": 0.8548707753479126, "eval_loss": 0.5190673470497131, "eval_runtime": 55.308, "eval_samples_per_second": 45.473, "eval_steps_per_second": 0.723, "step": 825 }, { "epoch": 3.27, "grad_norm": 5.728692531585693, "learning_rate": 2.274596192458443e-05, "loss": 0.7404, "step": 900 }, { "epoch": 3.64, "grad_norm": 4.377862453460693, "learning_rate": 2.12312251950283e-05, "loss": 0.7126, "step": 1000 }, { "epoch": 4.0, "grad_norm": 4.678811550140381, "learning_rate": 1.963525491562421e-05, "loss": 0.7056, "step": 1100 }, { "epoch": 4.0, "eval_accuracy": 0.8755467196819086, "eval_loss": 0.45482131838798523, "eval_runtime": 55.04, "eval_samples_per_second": 45.694, "eval_steps_per_second": 0.727, "step": 1100 }, { "epoch": 4.36, "grad_norm": 4.7580246925354, "learning_rate": 1.7978856999686182e-05, "loss": 0.6839, "step": 1200 }, { "epoch": 4.73, "grad_norm": 4.103386878967285, "learning_rate": 1.6283625127182596e-05, "loss": 0.6455, "step": 1300 }, { "epoch": 5.0, "eval_accuracy": 0.8854870775347913, "eval_loss": 0.42557355761528015, "eval_runtime": 55.0116, "eval_samples_per_second": 45.718, "eval_steps_per_second": 0.727, "step": 1375 }, { "epoch": 5.09, "grad_norm": 4.85685396194458, "learning_rate": 1.4571659238094557e-05, "loss": 0.6445, "step": 1400 }, { "epoch": 5.45, "grad_norm": 4.542230606079102, "learning_rate": 1.2865277425900725e-05, "loss": 0.5996, "step": 1500 }, { "epoch": 5.82, "grad_norm": 3.4589812755584717, "learning_rate": 1.11867249870977e-05, "loss": 0.6249, "step": 1600 }, { "epoch": 6.0, "eval_accuracy": 0.8846918489065606, "eval_loss": 0.411356657743454, "eval_runtime": 54.1236, "eval_samples_per_second": 46.468, "eval_steps_per_second": 0.739, "step": 1650 }, { "epoch": 6.18, "grad_norm": 3.62654972076416, "learning_rate": 9.557884419740387e-06, "loss": 0.6076, "step": 1700 }, { "epoch": 6.55, "grad_norm": 4.838754177093506, "learning_rate": 7.999990151614895e-06, "loss": 0.5802, "step": 1800 }, { "epoch": 6.91, "grad_norm": 4.127110004425049, "learning_rate": 6.533351716998466e-06, "loss": 0.5742, "step": 1900 }, { "epoch": 7.0, "eval_accuracy": 0.8874751491053678, "eval_loss": 0.4025728404521942, "eval_runtime": 54.4896, "eval_samples_per_second": 46.156, "eval_steps_per_second": 0.734, "step": 1925 }, { "epoch": 7.27, "grad_norm": 3.6499452590942383, "learning_rate": 5.177088990820725e-06, "loss": 0.5646, "step": 2000 }, { "epoch": 7.64, "grad_norm": 3.194026231765747, "learning_rate": 3.948882931853924e-06, "loss": 0.5627, "step": 2100 }, { "epoch": 8.0, "grad_norm": 4.581117630004883, "learning_rate": 2.86474508437579e-06, "loss": 0.5782, "step": 2200 }, { "epoch": 8.0, "eval_accuracy": 0.890258449304175, "eval_loss": 0.3943153917789459, "eval_runtime": 54.2999, "eval_samples_per_second": 46.317, "eval_steps_per_second": 0.737, "step": 2200 }, { "epoch": 8.36, "grad_norm": 4.3230791091918945, "learning_rate": 1.9388088432033446e-06, "loss": 0.5551, "step": 2300 }, { "epoch": 8.73, "grad_norm": 4.666913032531738, "learning_rate": 1.1831452032772499e-06, "loss": 0.5383, "step": 2400 }, { "epoch": 9.0, "eval_accuracy": 0.8882703777335984, "eval_loss": 0.3928951919078827, "eval_runtime": 54.0056, "eval_samples_per_second": 46.569, "eval_steps_per_second": 0.741, "step": 2475 }, { "epoch": 9.09, "grad_norm": 5.079577922821045, "learning_rate": 6.076053957825411e-07, "loss": 0.5563, "step": 2500 }, { "epoch": 9.45, "grad_norm": 4.677091598510742, "learning_rate": 2.1969246228460526e-07, "loss": 0.5505, "step": 2600 }, { "epoch": 9.82, "grad_norm": 2.785017967224121, "learning_rate": 2.4463441107965278e-08, "loss": 0.5495, "step": 2700 }, { "epoch": 10.0, "eval_accuracy": 0.8878727634194831, "eval_loss": 0.3921276926994324, "eval_runtime": 55.0192, "eval_samples_per_second": 45.711, "eval_steps_per_second": 0.727, "step": 2750 }, { "epoch": 10.0, "step": 2750, "total_flos": 1.3627285673563054e+19, "train_loss": 0.8061258281360973, "train_runtime": 7150.5683, "train_samples_per_second": 24.587, "train_steps_per_second": 0.385 } ], "logging_steps": 100, "max_steps": 2750, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.3627285673563054e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }