|
{ |
|
"best_metric": 0.3921276926994324, |
|
"best_model_checkpoint": "./vit-base-3e-5-randaug/checkpoint-2750", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 2750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 4.155110836029053, |
|
"learning_rate": 2.9902226030228252e-05, |
|
"loss": 2.7863, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 4.886130332946777, |
|
"learning_rate": 2.9610178754135005e-05, |
|
"loss": 1.6815, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7737574552683897, |
|
"eval_loss": 0.9074916839599609, |
|
"eval_runtime": 54.9169, |
|
"eval_samples_per_second": 45.796, |
|
"eval_steps_per_second": 0.728, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 4.915349960327148, |
|
"learning_rate": 2.9127665454592872e-05, |
|
"loss": 1.2844, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 4.22341775894165, |
|
"learning_rate": 2.8460976430370375e-05, |
|
"loss": 1.06, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 4.15113639831543, |
|
"learning_rate": 2.7618802992467718e-05, |
|
"loss": 0.9759, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8500994035785289, |
|
"eval_loss": 0.5867449641227722, |
|
"eval_runtime": 54.5476, |
|
"eval_samples_per_second": 46.106, |
|
"eval_steps_per_second": 0.733, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 4.799998760223389, |
|
"learning_rate": 2.661212415958624e-05, |
|
"loss": 0.9053, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 4.3418684005737305, |
|
"learning_rate": 2.5454063529829405e-05, |
|
"loss": 0.8298, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 4.906313896179199, |
|
"learning_rate": 2.4159718194531573e-05, |
|
"loss": 0.7955, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8548707753479126, |
|
"eval_loss": 0.5190673470497131, |
|
"eval_runtime": 55.308, |
|
"eval_samples_per_second": 45.473, |
|
"eval_steps_per_second": 0.723, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"grad_norm": 5.728692531585693, |
|
"learning_rate": 2.274596192458443e-05, |
|
"loss": 0.7404, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"grad_norm": 4.377862453460693, |
|
"learning_rate": 2.12312251950283e-05, |
|
"loss": 0.7126, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 4.678811550140381, |
|
"learning_rate": 1.963525491562421e-05, |
|
"loss": 0.7056, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8755467196819086, |
|
"eval_loss": 0.45482131838798523, |
|
"eval_runtime": 55.04, |
|
"eval_samples_per_second": 45.694, |
|
"eval_steps_per_second": 0.727, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"grad_norm": 4.7580246925354, |
|
"learning_rate": 1.7978856999686182e-05, |
|
"loss": 0.6839, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"grad_norm": 4.103386878967285, |
|
"learning_rate": 1.6283625127182596e-05, |
|
"loss": 0.6455, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8854870775347913, |
|
"eval_loss": 0.42557355761528015, |
|
"eval_runtime": 55.0116, |
|
"eval_samples_per_second": 45.718, |
|
"eval_steps_per_second": 0.727, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 4.85685396194458, |
|
"learning_rate": 1.4571659238094557e-05, |
|
"loss": 0.6445, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"grad_norm": 4.542230606079102, |
|
"learning_rate": 1.2865277425900725e-05, |
|
"loss": 0.5996, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"grad_norm": 3.4589812755584717, |
|
"learning_rate": 1.11867249870977e-05, |
|
"loss": 0.6249, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8846918489065606, |
|
"eval_loss": 0.411356657743454, |
|
"eval_runtime": 54.1236, |
|
"eval_samples_per_second": 46.468, |
|
"eval_steps_per_second": 0.739, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"grad_norm": 3.62654972076416, |
|
"learning_rate": 9.557884419740387e-06, |
|
"loss": 0.6076, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"grad_norm": 4.838754177093506, |
|
"learning_rate": 7.999990151614895e-06, |
|
"loss": 0.5802, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"grad_norm": 4.127110004425049, |
|
"learning_rate": 6.533351716998466e-06, |
|
"loss": 0.5742, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8874751491053678, |
|
"eval_loss": 0.4025728404521942, |
|
"eval_runtime": 54.4896, |
|
"eval_samples_per_second": 46.156, |
|
"eval_steps_per_second": 0.734, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"grad_norm": 3.6499452590942383, |
|
"learning_rate": 5.177088990820725e-06, |
|
"loss": 0.5646, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"grad_norm": 3.194026231765747, |
|
"learning_rate": 3.948882931853924e-06, |
|
"loss": 0.5627, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.581117630004883, |
|
"learning_rate": 2.86474508437579e-06, |
|
"loss": 0.5782, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.890258449304175, |
|
"eval_loss": 0.3943153917789459, |
|
"eval_runtime": 54.2999, |
|
"eval_samples_per_second": 46.317, |
|
"eval_steps_per_second": 0.737, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"grad_norm": 4.3230791091918945, |
|
"learning_rate": 1.9388088432033446e-06, |
|
"loss": 0.5551, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"grad_norm": 4.666913032531738, |
|
"learning_rate": 1.1831452032772499e-06, |
|
"loss": 0.5383, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8882703777335984, |
|
"eval_loss": 0.3928951919078827, |
|
"eval_runtime": 54.0056, |
|
"eval_samples_per_second": 46.569, |
|
"eval_steps_per_second": 0.741, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 5.079577922821045, |
|
"learning_rate": 6.076053957825411e-07, |
|
"loss": 0.5563, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"grad_norm": 4.677091598510742, |
|
"learning_rate": 2.1969246228460526e-07, |
|
"loss": 0.5505, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"grad_norm": 2.785017967224121, |
|
"learning_rate": 2.4463441107965278e-08, |
|
"loss": 0.5495, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8878727634194831, |
|
"eval_loss": 0.3921276926994324, |
|
"eval_runtime": 55.0192, |
|
"eval_samples_per_second": 45.711, |
|
"eval_steps_per_second": 0.727, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2750, |
|
"total_flos": 1.3627285673563054e+19, |
|
"train_loss": 0.8061258281360973, |
|
"train_runtime": 7150.5683, |
|
"train_samples_per_second": 24.587, |
|
"train_steps_per_second": 0.385 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2750, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1.3627285673563054e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|