{ "best_metric": 0.511895477771759, "best_model_checkpoint": "./vit-beta2-0.995/checkpoint-2889", "epoch": 19.0, "eval_steps": 500, "global_step": 6099, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 30.705322265625, "learning_rate": 1.8234275822273514e-05, "loss": 1.7709, "step": 321 }, { "epoch": 1.0, "eval_accuracy": 0.7038834951456311, "eval_f1": 0.6317206539589807, "eval_loss": 0.9409339427947998, "eval_precision": 0.6600572679415466, "eval_recall": 0.7038834951456311, "eval_runtime": 22.6099, "eval_samples_per_second": 127.555, "eval_steps_per_second": 15.966, "step": 321 }, { "epoch": 2.0, "grad_norm": 38.1316032409668, "learning_rate": 3.675706866705136e-05, "loss": 1.1633, "step": 642 }, { "epoch": 2.0, "eval_accuracy": 0.7371705963938974, "eval_f1": 0.6969962032458188, "eval_loss": 0.7317262887954712, "eval_precision": 0.7193055769194622, "eval_recall": 0.7371705963938974, "eval_runtime": 22.4171, "eval_samples_per_second": 128.652, "eval_steps_per_second": 16.104, "step": 642 }, { "epoch": 3.0, "grad_norm": 10.181236267089844, "learning_rate": 5.52798615118292e-05, "loss": 1.0429, "step": 963 }, { "epoch": 3.0, "eval_accuracy": 0.7624826629680999, "eval_f1": 0.7239854360368981, "eval_loss": 0.6350400447845459, "eval_precision": 0.7357005373068777, "eval_recall": 0.7624826629680999, "eval_runtime": 22.621, "eval_samples_per_second": 127.492, "eval_steps_per_second": 15.959, "step": 963 }, { "epoch": 4.0, "grad_norm": 36.46014404296875, "learning_rate": 7.380265435660705e-05, "loss": 0.9649, "step": 1284 }, { "epoch": 4.0, "eval_accuracy": 0.7694174757281553, "eval_f1": 0.78081186335, "eval_loss": 0.5759614706039429, "eval_precision": 0.803793715931661, "eval_recall": 0.7694174757281553, "eval_runtime": 22.7229, "eval_samples_per_second": 126.92, "eval_steps_per_second": 15.887, "step": 1284 }, { "epoch": 5.0, "grad_norm": 8.231016159057617, "learning_rate": 9.232544720138489e-05, "loss": 0.9051, "step": 1605 }, { "epoch": 5.0, "eval_accuracy": 0.7669902912621359, "eval_f1": 0.77317554088632, "eval_loss": 0.6440545320510864, "eval_precision": 0.7941009873640001, "eval_recall": 0.7669902912621359, "eval_runtime": 22.7099, "eval_samples_per_second": 126.993, "eval_steps_per_second": 15.896, "step": 1605 }, { "epoch": 6.0, "grad_norm": 7.757928848266602, "learning_rate": 9.984876489938473e-05, "loss": 0.9826, "step": 1926 }, { "epoch": 6.0, "eval_accuracy": 0.7850208044382802, "eval_f1": 0.7892025324025785, "eval_loss": 0.5661589503288269, "eval_precision": 0.7956162433005612, "eval_recall": 0.7850208044382802, "eval_runtime": 22.6328, "eval_samples_per_second": 127.425, "eval_steps_per_second": 15.95, "step": 1926 }, { "epoch": 7.0, "grad_norm": 11.347550392150879, "learning_rate": 9.889494151200358e-05, "loss": 0.8855, "step": 2247 }, { "epoch": 7.0, "eval_accuracy": 0.7264216366158114, "eval_f1": 0.7458569130340702, "eval_loss": 0.6881958246231079, "eval_precision": 0.7937071962372872, "eval_recall": 0.7264216366158114, "eval_runtime": 22.7206, "eval_samples_per_second": 126.933, "eval_steps_per_second": 15.889, "step": 2247 }, { "epoch": 8.0, "grad_norm": 2.9686567783355713, "learning_rate": 9.707962612088379e-05, "loss": 0.789, "step": 2568 }, { "epoch": 8.0, "eval_accuracy": 0.7364771151178918, "eval_f1": 0.7563577225180272, "eval_loss": 0.6491453051567078, "eval_precision": 0.8089248672404397, "eval_recall": 0.7364771151178918, "eval_runtime": 22.3088, "eval_samples_per_second": 129.277, "eval_steps_per_second": 16.182, "step": 2568 }, { "epoch": 9.0, "grad_norm": 6.504317760467529, "learning_rate": 9.443480321450928e-05, "loss": 0.7192, "step": 2889 }, { "epoch": 9.0, "eval_accuracy": 0.8075589459084604, "eval_f1": 0.8098127959154342, "eval_loss": 0.511895477771759, "eval_precision": 0.8207082348032128, "eval_recall": 0.8075589459084604, "eval_runtime": 22.6645, "eval_samples_per_second": 127.247, "eval_steps_per_second": 15.928, "step": 2889 }, { "epoch": 10.0, "grad_norm": 12.737798690795898, "learning_rate": 9.100707257835249e-05, "loss": 0.7012, "step": 3210 }, { "epoch": 10.0, "eval_accuracy": 0.79750346740638, "eval_f1": 0.807690598198484, "eval_loss": 0.5413523316383362, "eval_precision": 0.8340905013524049, "eval_recall": 0.79750346740638, "eval_runtime": 22.8925, "eval_samples_per_second": 125.98, "eval_steps_per_second": 15.769, "step": 3210 }, { "epoch": 11.0, "grad_norm": 33.7946891784668, "learning_rate": 8.685682824178951e-05, "loss": 0.6376, "step": 3531 }, { "epoch": 11.0, "eval_accuracy": 0.7947295423023578, "eval_f1": 0.8066140477733247, "eval_loss": 0.5712208151817322, "eval_precision": 0.8331612477677219, "eval_recall": 0.7947295423023578, "eval_runtime": 22.3907, "eval_samples_per_second": 128.804, "eval_steps_per_second": 16.123, "step": 3531 }, { "epoch": 12.0, "grad_norm": 3.9330718517303467, "learning_rate": 8.205719438083829e-05, "loss": 0.5412, "step": 3852 }, { "epoch": 12.0, "eval_accuracy": 0.8058252427184466, "eval_f1": 0.8145494176055466, "eval_loss": 0.5660970211029053, "eval_precision": 0.8327663750154051, "eval_recall": 0.8058252427184466, "eval_runtime": 23.2238, "eval_samples_per_second": 124.183, "eval_steps_per_second": 15.544, "step": 3852 }, { "epoch": 13.0, "grad_norm": 15.180066108703613, "learning_rate": 7.669273692531118e-05, "loss": 0.4667, "step": 4173 }, { "epoch": 13.0, "eval_accuracy": 0.819001386962552, "eval_f1": 0.8179067223293143, "eval_loss": 0.6374972462654114, "eval_precision": 0.8409999751324982, "eval_recall": 0.819001386962552, "eval_runtime": 22.8695, "eval_samples_per_second": 126.107, "eval_steps_per_second": 15.785, "step": 4173 }, { "epoch": 14.0, "grad_norm": 2.8769447803497314, "learning_rate": 7.085797357089247e-05, "loss": 0.4766, "step": 4494 }, { "epoch": 14.0, "eval_accuracy": 0.8252427184466019, "eval_f1": 0.83131443421793, "eval_loss": 0.5736179351806641, "eval_precision": 0.850771343089076, "eval_recall": 0.8252427184466019, "eval_runtime": 22.5348, "eval_samples_per_second": 127.98, "eval_steps_per_second": 16.02, "step": 4494 }, { "epoch": 15.0, "grad_norm": 3.6918177604675293, "learning_rate": 6.46557084486047e-05, "loss": 0.384, "step": 4815 }, { "epoch": 15.0, "eval_accuracy": 0.8356449375866851, "eval_f1": 0.8371244710047189, "eval_loss": 0.5305333137512207, "eval_precision": 0.8414602896223483, "eval_recall": 0.8356449375866851, "eval_runtime": 22.1675, "eval_samples_per_second": 130.1, "eval_steps_per_second": 16.285, "step": 4815 }, { "epoch": 16.0, "grad_norm": 7.413918495178223, "learning_rate": 5.8195220793532045e-05, "loss": 0.37, "step": 5136 }, { "epoch": 16.0, "eval_accuracy": 0.8314840499306518, "eval_f1": 0.8379252493969135, "eval_loss": 0.5530928373336792, "eval_precision": 0.8498567684865045, "eval_recall": 0.8314840499306518, "eval_runtime": 22.2316, "eval_samples_per_second": 129.725, "eval_steps_per_second": 16.238, "step": 5136 }, { "epoch": 17.0, "grad_norm": 8.914341926574707, "learning_rate": 5.15903395270923e-05, "loss": 0.2809, "step": 5457 }, { "epoch": 17.0, "eval_accuracy": 0.8637309292649098, "eval_f1": 0.860814136748839, "eval_loss": 0.5173911452293396, "eval_precision": 0.8629791065379673, "eval_recall": 0.8637309292649098, "eval_runtime": 22.2757, "eval_samples_per_second": 129.468, "eval_steps_per_second": 16.206, "step": 5457 }, { "epoch": 18.0, "grad_norm": 18.052043914794922, "learning_rate": 4.495743767726598e-05, "loss": 0.2681, "step": 5778 }, { "epoch": 18.0, "eval_accuracy": 0.8477808599167822, "eval_f1": 0.8504360089943235, "eval_loss": 0.5555988550186157, "eval_precision": 0.8554685811338774, "eval_recall": 0.8477808599167822, "eval_runtime": 22.3572, "eval_samples_per_second": 128.997, "eval_steps_per_second": 16.147, "step": 5778 }, { "epoch": 19.0, "grad_norm": 0.04609627276659012, "learning_rate": 3.841338197358591e-05, "loss": 0.2139, "step": 6099 }, { "epoch": 19.0, "eval_accuracy": 0.8255894590846047, "eval_f1": 0.8335016397329735, "eval_loss": 0.6290740966796875, "eval_precision": 0.8517909910468258, "eval_recall": 0.8255894590846047, "eval_runtime": 22.1886, "eval_samples_per_second": 129.977, "eval_steps_per_second": 16.27, "step": 6099 }, { "epoch": 19.0, "step": 6099, "total_flos": 7.550537882380222e+18, "train_loss": 0.713874793126947, "train_runtime": 2273.3603, "train_samples_per_second": 225.569, "train_steps_per_second": 14.12 } ], "logging_steps": 500, "max_steps": 32100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 7.550537882380222e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }